deepdoctection · JaMe76 · Nov 3, 2022 · Oct 24, 2022 · Oct 24, 2022 · Oct 24, 2022
diff --git a/.github/workflows/style.yaml b/.github/workflows/style.yaml
@@ -9,6 +9,7 @@ on:
 
 jobs:
   lint:
+    if: "contains(github.event.head_commit.message, '[force ci]') || github.ref == 'refs/heads/master'"
     runs-on: ${{ matrix.os }}
     strategy:
       matrix:
@@ -36,6 +37,7 @@ jobs:
         run: make lint
 
   black:
+    if: "contains(github.event.head_commit.message, '[force ci]') || github.ref == 'refs/heads/master'"
     runs-on: ${{ matrix.os }}
     strategy:
       matrix:
@@ -56,6 +58,7 @@ jobs:
           make black
 
   isort:
+    if: "contains(github.event.head_commit.message, '[force ci]') || github.ref == 'refs/heads/master'"
     runs-on: ${{ matrix.os }}
     strategy:
       matrix:
@@ -78,6 +81,7 @@ jobs:
           if [ -n "$(git status --porcelain --untracked-files=no)" ]; then exit 1; else echo "All clear"; fi
 
   mypy:
+    if: "contains(github.event.head_commit.message, '[force ci]') || github.ref == 'refs/heads/master'"
     runs-on: ${{ matrix.os }}
     strategy:
       matrix:

diff --git a/deepdoctection/__init__.py b/deepdoctection/__init__.py
@@ -142,8 +142,10 @@
         "LMTokenClassifier",
         "LMSequenceClassifier",
         "LanguageDetector",
+        "ImageTransformer",
         "InferenceResize",
         "D2FrcnnDetector",
+        "Jdeskewer",
         "DoctrTextlineDetector",
         "DoctrTextRecognizer",
         "FasttextLangDetector",
@@ -209,6 +211,7 @@
         "PipelineComponent",
         "PredictorPipelineComponent",
         "LanguageModelPipelineComponent",
+        "ImageTransformPipelineComponent",
         "Pipeline",
         "DetectResultGenerator",
         "SubImageLayoutService",
@@ -227,6 +230,7 @@
         "SegmentationResult",
         "TextExtractionService",
        "TextOrderService",
+        "SimpleTransformPipelineComponent",
     ],
     "train": ["D2Trainer", "train_d2_faster_rcnn", "LayoutLMTrainer", "train_hf_layoutlm", "train_faster_rcnn"],
     "utils": [

diff --git a/deepdoctection/datapoint/image.py b/deepdoctection/datapoint/image.py
@@ -183,11 +183,18 @@ def pdf_bytes(self, pdf_bytes: bytes) -> None:
         if not hasattr(self, "_pdf_bytes"):
             setattr(self, "_pdf_bytes", pdf_bytes)
 
-    def clear_image(self) -> None:
+    def clear_image(self, clear_bbox: bool = False) -> None:
         """
         Removes the :attr:`Image.image`. Useful, if the image must be a lightweight object.
+
+        :param clear_bbox: If set to `True` it will remove the image width and height. This is necessary,
+                           if the image is going to be replaced with a transform. It will also remove the self
+                           embedding entry
         """
         self._image = None
+        if clear_bbox:
+            self._bbox = None
+            self.embeddings.pop(self.image_id)
 
     def get_image(self) -> "_Img":  # type: ignore
         """

diff --git a/deepdoctection/extern/__init__.py b/deepdoctection/extern/__init__.py
@@ -29,6 +29,7 @@
 from ..utils.file_utils import tensorpack_available
 from .base import *
 from .d2detect import *
+from .deskew import *
 from .doctrocr import *
 from .fastlang import *
 from .hflayoutlm import *

diff --git a/deepdoctection/extern/base.py b/deepdoctection/extern/base.py
@@ -376,3 +376,19 @@ def possible_languages(self) -> List[ObjectTypes]:
         Returns a list of possible detectable languages
         """
         return list(self.categories.values())
+
+
+class ImageTransformer(PredictorBase):
+    """
+    Abstract base class for transforming an image. The :meth:`transform` accepts a numpy array and returns the same.
+    """
+
+    @abstractmethod
+    def transform(self, np_img: ImageType) -> ImageType:
+        """
+        Abstract method transform
+        """
+        raise NotImplementedError
+
+    def clone(self) -> PredictorBase:
+        return self.__class__()
diff --git a/deepdoctection/extern/deskew.py b/deepdoctection/extern/deskew.py
@@ -0,0 +1,55 @@
+# -*- coding: utf-8 -*-
+# File: deskew.py
+
+# Copyright 2022 Dr. Janis Meyer. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+jdeskew estimator and rotator to deskew images: https://github.com/phamquiluan/jdeskew
+"""
+
+from typing import List
+
+from ..utils.detection_types import ImageType, Requirement
+from ..utils.file_utils import get_jdeskew_requirement, jdeskew_available
+from .base import ImageTransformer
+
+if jdeskew_available():
+    from jdeskew.estimator import get_angle
+    from jdeskew.utility import rotate
+
+
+class Jdeskewer(ImageTransformer):
+    """
+    Deskew an image following https://phamquiluan.github.io/files/paper2.pdf . It allows to determine that deskew angle
+    up to 45 degrees and provides the corresponding rotation so that text lines range horizontally.
+    """
+
+    def __init__(self, min_angle_rotation: float = 2.0):
+        self.name = "jdeskew_transform"
+        self.min_angle_rotation = min_angle_rotation
+
+    def transform(self, np_img: ImageType) -> ImageType:
+        angle = get_angle(np_img)
+
+        if angle > self.min_angle_rotation:
+            return rotate(np_img, angle)
+        return np_img
+
+    @classmethod
+    def get_requirements(cls) -> List[Requirement]:
+        """
+        Get a list of requirements for running the detector
+        """
+        return [get_jdeskew_requirement()]
diff --git a/deepdoctection/pipe/__init__.py b/deepdoctection/pipe/__init__.py
@@ -33,3 +33,4 @@
 from .registry import *
 from .segment import *
 from .text import *
+from .transform import *
diff --git a/deepdoctection/pipe/base.py b/deepdoctection/pipe/base.py
@@ -26,7 +26,7 @@
 
 from ..dataflow import DataFlow, MapData
 from ..datapoint.image import Image
-from ..extern.base import ObjectDetector, PdfMiner, TextRecognizer
+from ..extern.base import ImageTransformer, ObjectDetector, PdfMiner, TextRecognizer
 from ..mapper.laylmstruct import LayoutLMFeatures
 from ..utils.context import timed_operation
 from ..utils.detection_types import JsonDict
@@ -189,6 +189,29 @@ def clone(self) -> "LanguageModelPipelineComponent":
         raise NotImplementedError
 
 
+class ImageTransformPipelineComponent(PipelineComponent, ABC):
+    """
+    Abstract pipeline component class with one model to transform images. This component is meant to be used at the
+    beginning of a pipeline
+    """
+
+    def __init__(self, name: str, transform_predictor: ImageTransformer):
+        """
+        :param name: Will be passed to base class
+        :param transform_predictor: Am ImageTransformer for image transformation
+        """
+
+        self.transform_predictor = transform_predictor
+        super().__init__(name)
+
+    @abstractmethod
+    def clone(self) -> "ImageTransformPipelineComponent":
+        """
+        Clone an instance
+        """
+        raise NotImplementedError
+
+
 class Pipeline(ABC):
     """
     Abstract base class for creating pipelines. Pipelines represent the framework with which documents can be processed

diff --git a/deepdoctection/pipe/transform.py b/deepdoctection/pipe/transform.py
@@ -0,0 +1,74 @@
+# -*- coding: utf-8 -*-
+# File: transform.py
+
+# Copyright 2022 Dr. Janis Meyer. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+Module for transform style pipeline components. These pipeline components are used for various transforming operations
+on images (e.g. deskew, de-noising or more general GAN like operations.
+"""
+
+from ..datapoint.image import Image
+from ..extern.base import ImageTransformer
+from ..utils.detection_types import JsonDict
+from ..utils.logger import logger
+from .base import ImageTransformPipelineComponent
+
+
+class SimpleTransformService(ImageTransformPipelineComponent):
+    """
+    Pipeline component for transforming an image. The service is designed for applying transform predictors that
+    take an image as numpy array as input and return the same. The service itself will change the underlying metadata
+    like height and width of the returned transform.
+
+    This component is meant to be used at the very first stage of a pipeline. If components have already returned image
+    annotations then this component will currently not re-calculate bounding boxes in terms of the transformed image.
+    It will raise a warning (at runtime) if image annotations have already been appended.
+    """
+
+    def __init__(self, transform_predictor: ImageTransformer):
+        """
+
+        :param transform_predictor: image transformer
+        """
+        super().__init__(self._get_name(transform_predictor.name), transform_predictor)
+
+    def serve(self, dp: Image) -> None:
+        if dp.annotations:
+            logger.warning(
+                "%s has already received image with image annotations. These annotations will not "
+                "be transformed and might cause unexpected output in your pipeline.", self.name
+            )
+        if dp.image is not None:
+            np_image_transform = self.transform_predictor.transform(dp.image)
+            self.dp_manager.datapoint.clear_image(True)
+            self.dp_manager.datapoint.image = np_image_transform
+
+    def clone(self) -> "SimpleTransformService":
+        return self.__class__(self.transform_predictor)
+
+    def get_meta_annotation(self) -> JsonDict:
+        return dict(
+            [
+                ("image_annotations", []),
+                ("sub_categories", {}),
+                ("relationships", {}),
+                ("summaries", []),
+            ]
+        )
+
+    @staticmethod
+    def _get_name(transform_name: str) -> str:
+        return f"simple_transform_{transform_name}"
diff --git a/deepdoctection/utils/file_utils.py b/deepdoctection/utils/file_utils.py
@@ -415,6 +415,25 @@ def scipy_available() -> bool:
     return bool(_SCIPY_AVAILABLE)
 
 
+# jdeskew dependency
+_JDESKEW_AVAILABLE = importlib.util.find_spec("jdeskew") is not None
+_JDESKEW_ERR_MSG = "jdeskew must be installed. >> pip install jdeskew"
+
+
+def jdeskew_available() -> bool:
+    """
+    Returns True if jdeskew is installed
+    """
+    return bool(_JDESKEW_AVAILABLE)
+
+
+def get_jdeskew_requirement() -> Requirement:
+    """
+    Returns jdeskew requirement.
+    """
+    return "jdeskew", jdeskew_available(), _JDESKEW_ERR_MSG
+
+
 # scikit-learn dependencies
 _SKLEARN_AVAILABLE = importlib.util.find_spec("sklearn") is not None
 _SKLEARN_ERR_MSG = "scikit-learn must be installed. >> pip install scikit-learn==1.0.2"

diff --git a/setup.cfg b/setup.cfg
@@ -89,6 +89,9 @@ ignore_missing_imports = True
 [mypy-doctr.*]
 ignore_missing_imports = True
 
+[mypy-jdeskew.*]
+ignore_missing_imports = True
+
 [mypy-deepdoctection.extern.tp.*]
 ignore_missing_imports = True
 ignore_errors = True

diff --git a/setup.py b/setup.py
@@ -73,6 +73,7 @@ def get_version():
     "transformers",
     "detectron2 @ git+https://github.com/facebookresearch/detectron2.git",
     # other third party related dependencies (services or DL libraries). Must be installed by users
+    "jdeskew",
     "boto3",
     "pdfplumber>=0.7.1",
     "tensorflow-addons>=0.13.0",
@@ -132,7 +133,7 @@ def deps_list(*pkgs: str):
 
 
 # remaining dependencies to use models that neither require TF nor PyTorch
-additional_deps = deps_list("boto3", "pdfplumber", "fasttext")
+additional_deps = deps_list("boto3", "pdfplumber", "fasttext", "jdeskew")
 
 # Tensorflow dependencies
 tf_deps = deps_list("tensorpack", "protobuf", "tensorflow-addons", "python-doctr")
@@ -159,6 +160,7 @@ def deps_list(*pkgs: str):
     "lxml",
     "lxml-stubs",
     "pycocotools",
+    "jdeskew",
 )