Source code for tile_processing.properties

# Copyright (c) Fraunhofer MEVIS, Germany. All rights reserved.
# **InsertLicense** code

# exempt this module from Python module reloading, which frequently breaks
# exception handling, isinstance checks and enum value comparisons
_mlab_do_not_reload = True # value is currently ignored, but it must be defined

import json
import os.path
from typing import Union, Optional, Dict, Tuple, Any

try:
  from pydantic.v1 import constr, conlist, conint, Field, validator, BaseModel, Extra, confloat, BaseConfig
except ImportError:
  from pydantic import constr, conlist, conint, Field, validator, BaseModel, Extra, confloat, BaseConfig
from parameter_info.utils import IndentOnlyNestedSequencesJsonEncoder, to_ParameterInfo

from .helpers import toFullVec
from .property_types import FillMode, DimensionMappings, DimensionMapping, ImageDimension, \
  StringToDimensionMappings, NumpyDataType, DimensionMappingsToString

ML_DIM_COUNT = 6

VERSION_NUMBER = 2
VERSION_KEY = "VERSION"
COMMENT_KEY = "__comment"
INPUTS_KEY = "inputs"
OUTPUTS_KEY = "outputs"

Extent = conint( ge=1 )

def MLVector( dataType ):
  return conlist( item_type=dataType, min_items=ML_DIM_COUNT, max_items=ML_DIM_COUNT )

MLPositiveImageVector = MLVector( Extent )
MLNonNegativeImageVector = MLVector( conint( ge=0 ) )
MLImageVector = MLVector( int )
MLPositiveFloatVector = MLVector( confloat( gt=1e-6 ) )
NonEmptyString = constr( min_length=1 )

[docs]class PydanticConfig( BaseConfig ):
  # If 'use_enum_values' is set, the dict() methods already create values (in out case strings) from the enum items.
  # That is not desired when the dict is used for anything other than json serialization, so we disable it here.
  use_enum_values = False

  # We also want to check on assignment, not only creation
  validate_assignment = True

  arbitrary_types_allowed = True

#---------------------------------------------------------------------------
# Reusable validators

def ensureIsValidDimensionMapping( v ) -> DimensionMappings:
  # When use_enum_values is True, DimensionMappings are sometimes passed as sequences of strings
  # rather than of DimensionMapping instances, so we cover this here
  if isinstance( v, ( tuple, list ) ) and v and isinstance( v[0], str ):
    v = ", ".join( v ) # convert to proper string
  if isinstance( v, str ):
    v = StringToDimensionMappings( v )
  return v

def ensureIs( v, StrEnumDataType ):
  if isinstance( v, str ):
    try:
      v = StrEnumDataType[ v ]
    except KeyError:
      # Unfortunately, the validation error will not include the name of the invalid item, so I print it here,
      # before the error is raised by the subsequent validator
      print( f"Unknown key for class {StrEnumDataType.__name__}: '{v}'" )
  return v

def ensureIsImageDimension( v ) -> ImageDimension:
  return ensureIs( v, ImageDimension )

def ensureIsNumpyDataType( v ) -> NumpyDataType:
  return ensureIs( v, NumpyDataType )

def ensureIsFillMode( v ) -> FillMode:
  return ensureIs( v, FillMode )

def ensureIsOrderedDict( v:Dict[ str, Any ] ) -> Dict[ str, Any ]:
  return { k:v for k, v in sorted( v.items() ) }


# NOTE: Do NOT remove or rename the unused 'values' and 'config' parameters, they are required for use
# as a pydantic validator!
# noinspection PyUnusedLocal
def ensureVectorHasFullRank( cls, field_value, values, config, field ) -> Any:
  if isinstance( field_value, ( tuple, list ) ):
    if field.name == "dimensions":
      field_value = ensureIsValidDimensionMapping( field_value )
    field_value = toFullVec( field_value, cls.getDefaultComponentValue( field.name ) )
  return field_value

#---------------------------------------------------------------------------

class _PropertiesBase( BaseModel ):
  class Config( PydanticConfig ):
    extra = Extra.forbid  # Currently, we don't allow extra items, so that typos show up in the JSON schema. But we could change that
    # This would work once https://github.com/samuelcolvin/pydantic/pull/2745 is available, making the JSON (ParameterInfo)
    # representation nicer to read. Note that once this is changed, the FromQVariant method used by the bridge to convert
    # from python to C++ must also be adapted.
    #json_encoders = {
    #  DimensionMappings: lambda field_value: DimensionMappingsToString(field_value),
    #}

  def __init__(self, **kwargs):
    kwargs.pop( COMMENT_KEY, None ) # comments not used, but allowed
    super().__init__(**kwargs)

  def json( self, *args, **kwargs ) -> str:
    """
    Returns a JSON representation of the object. See dict() for many of the possible parameters.
    """
    #NOTE: Because we do some custom stuff here (mostly use IndentOnlyNestedSequencesJsonEncoder by default
    #      and currently implement the hack for DimensionMappingsToString), we do not use the built-in json()
    #      method at all.
    encoder = kwargs.pop( "encoder", IndentOnlyNestedSequencesJsonEncoder )
    indent = kwargs.pop( "indent", 4 )
    sort_keys = kwargs.pop( "sort_keys", True )
    d = self.dict( *args, **kwargs )
    # TODO: When json_encoders works (see commented out section above), remove the following hack:
    for i in d.get( INPUTS_KEY, {} ).values():
      if i.get( "dimensions" ):
        i[ "dimensions" ] = DimensionMappingsToString( i[ "dimensions" ] )
    # END HACK
    return json.dumps( d, cls=encoder, indent=indent, sort_keys=sort_keys )

  def getParameterInfo( self ):
    """
    Returns the properties as a ParameterInfo dictionary.
    """
    pi = self.dict( exclude_none=True )
    pi.pop( VERSION_KEY )
    return to_ParameterInfo( pi )

  @classmethod
  def getPropertyType( cls, propertyName: str ) -> Any:
    """
    Returns the type of the property with the given name.
    """
    return cls.schema()[ "properties" ][ propertyName ][ "example" ].__class__

  @classmethod
  def getDefaultComponentValue( cls, propertyName: str ) -> Any:
    """
    Returns the default component value of the provided property. Use only for
    vector properties that actually define such a value (will raise otherwise).
    """
    return cls.schema()[ "properties" ][ propertyName ][ "default_component_value" ]

  @classmethod
  def getPropertyNames( cls ) -> Tuple[ str, ... ]:
    """
    Returns a tuple with all available property names.
    """
    return tuple( cls.schema()[ "properties" ].keys() )


#---------------------------------------------------------------------

[docs]class InputProperties( _PropertiesBase ):
  """
  ``InputProperties`` describe a tile processor's input. These properties define how input tiles are to be prepared.

  All properties are optional, i.e. can also have ``null``/``None`` value, in which case a default handling kicks in.
  """

  # Note: We cannot use multiline strings (""") for descriptions as this will disable formatting in the created documentation.
  dataType: Optional[ NumpyDataType ] = Field(
    description=os.linesep.join( [
      "Data type for this input's image values.",
      "",
      "- Required for both, page-wise and global processing.",
      "",
      "If ``null``/``None`` or omitted, ``\"float32\"`` will be assumed.",
    ] ),
    example=NumpyDataType.float32
  )
  padding: Optional[ MLImageVector ] = Field(
    description=os.linesep.join( [
      "For page-wise processing, it defines how much to add to a corresponding output's ``tileSize`` to derive the "
      "input tile size (when applied symmetrically).",
      "",
      "- If this results in requesting regions outside the input image, the field ``fillMode`` becomes relevant.",
      "- If fewer than 6 components are specified, zeroes are appended to full size.",
      "",
      "For global processing, the parameter is currently not used."
      "",
      "If ``null``/``None`` or omitted, ``[0, 0, 0, 0, 0, 0]`` (no padding) will be assumed.",
    ] ),
    example=[16, 16, 0, 0, 0, 0],
    default_component_value=0
  )
  fillMode: Optional[ FillMode ] = Field(
    description=os.linesep.join( [
      "Defines how to fill any undefined input areas that may be required due to ``padding`` or because"
      "the input image size is not an integer multiple of the input tile size.",
      "",
      "For global processing, the parameter is currently not used, as the input tile always has the exact "
      "size of the input image and there are no undefined regions."
      "",
      "If ``null``/``None`` or omitted, ``Reflect`` is assumed.",
    ] ),
    example=FillMode.Reflect
  )
  fillValue: Optional[ float ] = Field(
    description=os.linesep.join( [
      "Value with which to fill undefined (but required) input areas if ``fillMode`` is ``\"FillValue\"``",
      "",
      "For global processing, the parameter is currently not used, as the input tile always has the exact "
      "size of the input image and there are no undefined regions."
      "",
      "If ``null``/``None`` or omitted, ``0`` is assumed.",
    ] ),
    example=0
  )
  dimensions: Optional[ Union[ DimensionMappings, str ] ] = Field(
    description=os.linesep.join( [
      "Input dimension order as expected by the processor for page-wise processing (currently not used for"
      "global processing).",
      "",
      "- May contain actual dimensions such as ``X`` or ``Y`` but also semantic placeholders such as ``CHANNEL1``, "
      "``CHANNEL2`` or ``BATCH`` to indicate which dimension order the processor (model) expects its input, "
      "and, together with ``externalDimensionForChannel1/2/Batch``, how to rearrange ML/MeVisLab input image dimensions "
      "to get the expected result.",
      "- This also defines the inverse order in which corresponding output images should be "
      "reformatted before providing them to MeVisLab.",
      "- Currently, the dimensions are only relevant for page/patch-wise processing, e.g. with ``ProcessTiles`` "
      "or ``ApplyTileProcessorPageWise``.",
      "- For convenience, you may specify the value as a comma-separated string such as ``\"X, CHANNEL1, BATCH\"``, "
      "or as a list of stings/enum items ``[ \"X\", \"CHANNEL1\", \"BATCH\" ]``, which is a bit more cumbersome, but has "
      "the advantage that items can be verified by your JSON-linter while typing using the schema.",
      "- If fewer than 6 components are specified, ``UNUSED`` is internally appended up to full size.",
      "",
      "If ``null``/``None`` or omitted, ``\"X, Y, CHANNEL1, BATCH\"`` is assumed.",
    ] ),
    example=[ DimensionMapping.X, DimensionMapping.Y,
              DimensionMapping.CHANNEL1,
              DimensionMapping.BATCH ],
    default_component_value = DimensionMapping.UNUSED
  )
  externalDimensionForChannel1: Optional[ ImageDimension ] = Field(
    description=os.linesep.join( [
      "External (input image) dimension to map to the ``CHANNEL1`` entry in ``dimensions`` (page-wise processing only).",
      "",
      "If ``null``/``None`` or omitted, ``C`` is assumed.",
    ] ),
    example=ImageDimension.C )
  externalDimensionForChannel2: Optional[ ImageDimension ] = Field(
    description=os.linesep.join( [
      "External (input image) dimension to map to the ``CHANNEL2`` entry in ``dimensions`` (page-wise processing only).",
      "",
      "If ``null``/``None`` or omitted, ``U`` is assumed.",
    ] ),
    example=ImageDimension.U )
  externalDimensionForBatch: Optional[ ImageDimension ] = Field(
    description=os.linesep.join( [
      "External (input image) dimension to map to the ``BATCH`` entry in ``dimensions`` (page-wise processing only).",
      "In page-wise/patch-based processing, the ``BATCH`` dimension is often used to combine multiple "
      "individually and independently processable items (patches) into a larger **\"batch\"** for performance reasons. "
      "It is therefore typically the last entry in ``dimensions``."
      "",
      "If ``null``/``None`` or omitted, a suitable dimension is guessed by the application module, i.e. the largest "
      "otherwise \"unused\" dimension is chosen.",
    ] ),
    example=ImageDimension.Z )

  _ensureVectorHasFullRank = validator( "dimensions", "padding", pre=True, allow_reuse=True )( ensureVectorHasFullRank )
  _ensureIsNumpyDataType = validator( "dataType", pre=False, allow_reuse=True )( ensureIsNumpyDataType )
  _ensureIsFillMode = validator( "fillMode", pre=False, allow_reuse=True )( ensureIsFillMode )
  _ensureIsValidDimensionMapping = validator( "dimensions", pre=False, allow_reuse=True )( ensureIsValidDimensionMapping )
  _ensureIsValidImageDimension = validator( "externalDimensionForChannel1",
                                            "externalDimensionForChannel2",
                                            "externalDimensionForBatch",
                                            pre=False, allow_reuse=True )( ensureIsImageDimension )


#---------------------------------------------------------------------

[docs]class OutputProperties( _PropertiesBase ):
  """
  Properties a tile processor input can have. These properties define how input tiles are to be prepared.
  """

  referenceInput: Optional[ NonEmptyString ] = Field(
    description=os.linesep.join( [
      "Name of the reference input associated with this output. For an associated input/output pair, the following is assumed: ",
      "",
      "- For page-wise processing: ``input.tileSize = output.tileSize * output.stride + 2*input.padding``",
      "- Always: The output's world matrix (position, orientation and scale in the world/patient coordinate system) "
      "is derived from this input's (possibly taking into account translation and/or scaling differences because of "
      "``padding`` and/or ``stride``.",
      "",
      "If ``null``/``None`` or omitted: An arbitrary input is assumed as reference input (the first one, if sorting is stable).",
    ] ),
    example="input0"
  )
  dataType: Optional[ NumpyDataType ] = Field(
    description=os.linesep.join( [
      "Data type for this output's image values.",
      "",
      "- For global processing, the value is currently not used, because it need not be known in advance.",
      "",
      "If ``null``/``None`` or omitted, page-wise processing will assume``\"float32\"``.",
    ] ),
    example=NumpyDataType.float32
  )
  tileSize: Optional[ MLNonNegativeImageVector ] = Field(
    description=os.linesep.join( [
      "Proposed size for output tiles to be requested.",
      "",
      "- For page-wise processing, see ``referenceInput`` documentation on how input tile size is derived from this and "
      "other parameters.",
      "- For global processing, the value is not used, because it need not be known in advance and the output tile will always "
      "have full extent.",
      "",
      "Any ``0`` entries are mapped to the full extent of the reference input image (in the corresponding dimension)",
      "If ``null``/``None`` or omitted ``[128, 128, 1, 1, 1, 1]`` will be assumed (possibly corrected "
      "according to ``tileSizeMinimum`` and ``tileSizeOffset``).",
    ] ),
    example=[ 192, 192, 3, 1, 1, 1 ],
    default_component_value=1,
  )
  tileSizeMinimum: Optional[ MLImageVector ] = Field(
    description=os.linesep.join( [
      "Minimum output tile size: ``tileSize`` must not be smaller (component-wise).",
      "",
      "- For page-wise processing, this parameter may be used (in combination with ``tileSizeOffset``) to snap unsuitable tileSize proposals "
      "to valid values by assuming that all sizes that can be expressed as ``tileSizeMinimum + n*tileSizeOffset`` "
      "(for n=0, 1, 2, 3, ...) are valid.",
      "- For global processing, the value is not used." ,
      "",
      "If ``null``/``None`` or omitted, ``[1, 1, 1, 1, 1, 1]`` will be assumed.",
    ] ),
    example=[ 16, 16, 3, 1, 1, 1 ],
    default_component_value=1,
  )
  tileSizeOffset: Optional[ MLImageVector ]= Field(
    description=os.linesep.join( [
      "(Minimum) offset between two valid `tileSize``s.",
      "",
      "- For page-wise processing, this parameter may be used (in combination with ``tileSizeMinimum``) to snap unsuitable tileSize proposals "
      "to valid values by assuming that all sizes that can be expressed as ``tileSizeMinimum + n*tileSizeOffset`` "
      "(for n=0, 1, 2, 3, ...) are valid.",
      "- For global processing, the value is not used.",
      "",
      "If ``null``/``None`` or omitted, ``[1, 1, 1, 1, 1, 1]`` will be assumed.",
    ] ),
    example=[4, 4, 0, 1, 1, 1],
    default_component_value=1
  )
  stride: Optional[ MLPositiveFloatVector ] = Field(
    description=os.linesep.join( [
      "\"Stride\" for the output tile, in relation to its ``referenceInput`` tile at  in page-wise processing.",
      "",
      "- Component values ``>1`` correspond to a \"downsampling\" operation in that dimension. E.g. a stride of 2 indicates"
      "that an input tile that is twice as large (neglecting padding).",
      "- Component values in ``]0, 1[`` correspond to an \"upsampling\" operation in that dimension. E.g. a stride of 0.5 indicates"
      "that an input tile that is half as large as an output tile (neglecting padding).",
      "- See ``referenceInput`` documentation on how exactly input tile size is derived from ``stride`` and "
      "other parameters.",
      "",
      "Component values are positive floating point numbers:",
      "",
      "If ``null``/``None`` or omitted, ``[1.0, 1.0, 1.0, 1.0, 1.0, 1.0]`` will be assumed.",
    ] ),
    example=[ 0.5, 2, 1, 1, 1, 1 ],
    default_component_value=1.0
  )
  valueMinimum: Optional[ float ] = Field(
    description=os.linesep.join( [
      "Smallest possible image value (assumed). Will not be used to clamp values, but to adapt ML image properties.",
      "",
      "- In page-wise/demand-driven mode, this value is especially important, as it cannot be easily computed.",
      "- In global mode, the exact minimum value will be auto-computed if ``valueMinimum`` is not specified, so you only "
      "have to use it if for some reason you want a value that is different from the actual minimum (e.g. for classification"
      "tasks, you may want fix the minimum to 0 (if that is your background)).",
      "- If unsure, always be conservative. Subsequent algorithms may depend on there not being any values outside of "
      "``[valueMinimum, valueMaximum]``.",
      "",
      "If ``null``/``None`` or omitted:",
      "",
      "- Page-wise processing will assume ``0``.",
      "- Global processing will compute the actual image minimum from the output tile.",
    ] ),
    example=0,
  )
  valueMaximum: Optional[ float ] = Field(
    description=os.linesep.join( [
      "Largest possible image value (assumed). Will not be used to clamp values, but to adapt ML image properties.",
      "",
      "- In page-wise/demand-driven mode, this value is especially important, as it cannot be easily computed.",
      "- In global mode, the exact maximum value will be auto-computed if ``valueMaximum`` is not specified, so you only "
      "have to use it if for some reason you want a value that is different from the actual maximum (e.g. for classification"
      "tasks, you may want use the number of actual classes possible).",
      "- If unsure, always be conservative. Subsequent algorithms may depend on there not being any values outside of "
      "``[valueMinimum, valueMaximum]``.",
      "",
      "If ``null``/``None`` or omitted:",
      "",
      "- Page-wise processing will assume ``1``.",
      "- Global processing will compute the actual image maximum from the output tile.",
    ] ),
    example=1,
  )

  # outputDimensions: Optional[ DimensionMappings ] = Field( description="Order of the dimensions to map the processor output to", example=[ DimensionMapping.CHANNEL1, DimensionMapping.BATCH ] )
  # externalDimensionForChannel1: Optional[ ImageDimension ] = Field( description="External (input image) dimension to map to the CHANNEL1 dimension.", example=ImageDimension.C )
  # externalDimensionForChannel2: Optional[ ImageDimension ] = Field( description="External (input image) dimension to map to the CHANNEL2 dimension (if used).", example=ImageDimension.U )
  # externalDimensionForBatch: Optional[ ImageDimension ] = Field( description="External (input image) dimension to map to the BATCH dimension (if used).", example=ImageDimension.Z )

  # Disabled as it makes it hard to initialize properties step-by-step
  # @validator( "stride" )
  # def requires_reference_input( cls, field_value, values ) -> MLPositiveImageVector:
  #   if not values.get( "referenceInput" ):
  #     raise ValueError( "Usage of the 'stride' parameter requires a 'referenceInput'" )
  #   return field_value

  _ensure_vector_has_full_rank = validator( "stride",
                                            "tileSize",
                                            "tileSizeMinimum",
                                            "tileSizeOffset",
                                            pre=True, allow_reuse=True )( ensureVectorHasFullRank )
  _ensureIsNumpyDataType = validator( "dataType", pre=False, allow_reuse=True )( ensureIsNumpyDataType )


#---------------------------------------------------------------------

[docs]class TileProcessorProperties( _PropertiesBase ):
  """
  Comprises all generic properties a ``TileProcessor`` can have, which is mostly relevant to
  describe its inputs/output.

  We differentiate between two different *application modes* for a ``TileProcessor``:

  - For a **"global"** (or **"single-tile"**) processing of the input image(s), very few of the properties are relevant.
  - For **"page-wise"** (or **"tiled"** or **"patch-based"**) processing of the input image(s), much more information
  
  needs to be provided so that the processor application module (e.g. ``ProcessTiles``) knows how to create the individual
  input tiles/patches/batches from the input image and put the resulting output tiles back together into a
  comprehensive output image.

  For details on the properties, see member ``inputs`` (``InputProperties``) and ``outputs`` (``OutputProperties``) documentation.
  """

[docs]  class Config( PydanticConfig ):
    title = "TileProcessorProperties"
    schema_extra = {
      '$schema': 'http://json-schema.org/draft-07/schema'
    }

  def __init__(self, **kwargs):
    """
      For convenience/testing, we allow the construction without inputs/outputs although this is not allowed by the schema
    """
    kwargs.pop( VERSION_KEY, None ) # version currently not used, but allowed
    if INPUTS_KEY not in kwargs:
      kwargs[ INPUTS_KEY ] = {}
    if OUTPUTS_KEY not in kwargs:
      kwargs[ OUTPUTS_KEY ] = {}
    super().__init__( **kwargs )

  VERSION: Optional[ int ] = Field( description="Version number for the properties format.", default=VERSION_NUMBER )

  inputs: Dict[ NonEmptyString, InputProperties ] = Field(
    description="Dictionary of ``InputProperties``, where the keys are the input names."
  )
  outputs: Dict[ NonEmptyString, OutputProperties ] = Field(
    description="Dictionary of ``OutputProperties``, where the keys are the output names."
  )

  # NOTE: There are some modules (e.g. SetTileProcessorProperties) that rely on reproducible sorting of Input/Output properties.
  #       This is not a problem in C++, as the std::maps used there are always auto-sorted by key.
  #       However, python dicts are sorted by insertion order, so we would like to sort them by key as well.
  #       This validator can ensure sorting on assignment of the entire dict, but not on item insertion, and
  #       it's hard to see how to work around this without using something like sortedcontainers (which is not
  #       currently part of MeVisLab).
  _ensureIsOrderedDict = validator( INPUTS_KEY, OUTPUTS_KEY, pre=True, allow_reuse=True )( ensureIsOrderedDict )

[docs]  def getInputNames( self ) -> Tuple[ str, ... ]:
    """
    Just for convenience to simplify porting from TileProcessor.getInputNames()
    """
    return tuple( self.inputs.keys() )

[docs]  def getOutputNames( self ) -> Tuple[ str, ... ]:
    """
    Just for convenience to simplify porting from TileProcessor.getOutputNames()
    """
    return tuple( self.outputs.keys() )

[docs]  @staticmethod
  def getJsonSchemaPath() -> str:
    """
    Returns the path to the latest version of the schema.

    It can be updated by running the test_properties unit tests with g_UPDATE_PROPERTIES_SCHEMA set in the
    test code.
    """
    return os.path.join( os.path.dirname( __file__ ), "schemas", "json", "properties_schema.json" )

[docs]  @staticmethod
  def getJsonSchemaDocumentationPath() -> str:
    """
    Returns the path to the latest version of the schema documentation.

    It can be updated together with the schema by running the test_properties unit tests with g_UPDATE_PROPERTIES_SCHEMA set in the
    test code, if you pip-install json_schema_for_humans first.
    """
    targetPathInPackageGroup = "Foundation/Documentation/Publish/FME_PythonToolbox_PublicSDK/html/TileProcessorProperties"
    return os.path.realpath( os.path.join( os.path.dirname( __file__ ), *( [ ".." ] * 5 ), *targetPathInPackageGroup.split( "/" ) ) )