8000 Adding an image transformer and corresponding pipeline component by JaMe76 · Pull Request #82 · deepdoctection/deepdoctection · GitHub
[go: up one dir, main page]
More Web Proxy on the site http://driver.im/
Skip to content

Adding an image transformer and corresponding pipeline component #82

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Nov 3, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .github/workflows/style.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ on:

jobs:
lint:
if: "contains(github.event.head_commit.message, '[force ci]') || github.ref == 'refs/heads/master'"
runs-on: ${{ matrix.os }}
strategy:
matrix:
Expand Down Expand Up @@ -36,6 +37,7 @@ jobs:
run: make lint

black:
if: "contains(github.event.head_commit.message, '[force ci]') || github.ref == 'refs/heads/master'"
runs-on: ${{ matrix.os }}
strategy:
matrix:
Expand All @@ -56,6 +58,7 @@ jobs:
make black

isort:
if: "contains(github.event.head_commit.message, '[force ci]') || github.ref == 'refs/heads/master'"
runs-on: ${{ matrix.os }}
strategy:
matrix:
Expand All @@ -78,6 +81,7 @@ jobs:
if [ -n "$(git status --porcelain --untracked-files=no)" ]; then exit 1; else echo "All clear"; fi

mypy:
if: "contains(github.event.head_commit.message, '[force ci]') || github.ref == 'refs/heads/master'"
runs-on: ${{ matrix.os }}
strategy:
matrix:
Expand Down
4 changes: 4 additions & 0 deletions deepdoctection/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,8 +142,10 @@
"LMTokenClassifier",
"LMSequenceClassifier",
"LanguageDetector",
"ImageTransformer",
"InferenceResize",
"D2FrcnnDetector",
"Jdeskewer",
"DoctrTextlineDetector",
"DoctrTextRecognizer",
"FasttextLangDetector",
Expand Down Expand Up @@ -209,6 +211,7 @@
"PipelineComponent",
"PredictorPipelineComponent",
"LanguageModelPipelineComponent",
"ImageTransformPipelineComponent",
"Pipeline",
"DetectResultGenerator",
"SubImageLayoutService",
Expand All @@ -227,6 +230,7 @@
"SegmentationResult",
"TextExtractionService",
"TextOrderService",
"SimpleTransformPipelineComponent",
],
"train": ["D2Trainer", "train_d2_faster_rcnn", "LayoutLMTrainer", "train_hf_layoutlm", "train_faster_rcnn"],
"utils": [
Expand Down
9 changes: 8 additions & 1 deletion deepdoctection/datapoint/image.py
Original file line number Diff line number Diff line change
Expand Up @@ -183,11 +183,18 @@ def pdf_bytes(self, pdf_bytes: bytes) -> None:
if not hasattr(self, "_pdf_bytes"):
setattr(self, "_pdf_bytes", pdf_bytes)

def clear_image(self) -> None:
def clear_image(self, clear_bbox: bool = False) -> None:
"""
Removes the :attr:`Image.image`. Useful, if the image must be a lightweight object.

:param clear_bbox: If set to `True` it will remove the image width and height. This is necessary,
if the image is going to be replaced with a transform. It will also remove the self
embedding entry
"""
self._image = None
if clear_bbox:
self._bbox = None
self.embeddings.pop(self.image_id)

def get_image(self) -> "_Img": # type: ignore
"""
Expand Down
1 change: 1 addition & 0 deletions deepdoctection/extern/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
from ..utils.file_utils import tensorpack_available
from .base import *
from .d2detect import *
from .deskew import *
from .doctrocr import *
from .fastlang import *
from .hflayoutlm import *
Expand Down
16 changes: 16 additions & 0 deletions deepdoctection/extern/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -376,3 +376,19 @@ def possible_languages(self) -> List[ObjectTypes]:
Returns a list of possible detectable languages
"""
return list(self.categories.values())


class ImageTransformer(PredictorBase):
"""
Abstract base class for transforming an image. The :meth:`transform` accepts a numpy array and returns the same.
"""

@abstractmethod
def transform(self, np_img: ImageType) -> ImageType:
"""
Abstract method transform
"""
raise NotImplementedError

def clone(self) -> PredictorBase:
return self.__class__()
55 changes: 55 additions & 0 deletions deepdoctection/extern/deskew.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
# -*- coding: utf-8 -*-
# File: deskew.py

# Copyright 2022 Dr. Janis Meyer. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""
jdeskew estimator and rotator to deskew images: https://github.com/phamquiluan/jdeskew
"""

from typing import List

from ..utils.detection_types import ImageType, Requirement
from ..utils.file_utils import get_jdeskew_requirement, jdeskew_available
from .base import ImageTransformer

if jdeskew_available():
from jdeskew.estimator import get_angle
from jdeskew.utility import rotate


class Jdeskewer(ImageTransformer):
"""
Deskew an image following https://phamquiluan.github.io/files/paper2.pdf . It allows to determine that deskew angle
up to 45 degrees and provides the corresponding rotation so that text lines range horizontally.
"""

def __init__(self, min_angle_rotation: float = 2.0):
self.name = "jdeskew_transform"
self.min_angle_rotation = min_angle_rotation

def transform(self, np_img: ImageType) -> ImageType:
angle = get_angle(np_img)

if angle > self.min_angle_rotation:
return rotate(np_img, angle)
return np_img

@classmethod
def get_requirements(cls) -> List[Requirement]:
"""
Get a list of requirements for running the detector
"""
return [get_jdeskew_requirement()]
1 change: 1 addition & 0 deletions deepdoctection/pipe/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,3 +33,4 @@
from .registry import *
from .segment import *
from .text import *
from .transform import *
25 changes: 24 additions & 1 deletion deepdoctection/pipe/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@

from ..dataflow import DataFlow, MapData
from ..datapoint.image import Image
from ..extern.base import ObjectDetector, PdfMiner, TextRecognizer
from ..extern.base import ImageTransformer, ObjectDetector, PdfMiner, TextRecognizer
from ..mapper.laylmstruct import LayoutLMFeatures
from ..utils.context import timed_operation
from ..utils.detection_types import JsonDict
Expand Down< 10000 /tool-tip> Expand Up @@ -189,6 +189,29 @@ def clone(self) -> "LanguageModelPipelineComponent":
raise NotImplementedError


class ImageTransformPipelineComponent(PipelineComponent, ABC):
"""
Abstract pipeline component class with one model to transform images. This component is meant to be used at the
beginning of a pipeline
"""

def __init__(self, name: str, transform_predictor: ImageTransformer):
"""
:param name: Will be passed to base class
:param transform_predictor: Am ImageTransformer for image transformation
"""

self.transform_predictor = transform_predictor
super().__init__(name)

@abstractmethod
def clone(self) -> "ImageTransformPipelineComponent":
"""
Clone an instance
"""
raise NotImplementedError


class Pipeline(ABC):
"""
Abstract base class for creating pipelines. Pipelines represent the framework with which documents can be processed
Expand Down
74 changes: 74 additions & 0 deletions deepdoctection/pipe/transform.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
# -*- coding: utf-8 -*-
# File: transform.py

# Copyright 2022 Dr. Janis Meyer. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""
Module for transform style pipeline components. These pipeline components are used for various transforming operations
on images (e.g. deskew, de-noising or more general GAN like operations.
"""

from ..datapoint.image import Image
from ..extern.base import ImageTransformer
from ..utils.detection_types import JsonDict
from ..utils.logger import logger
from .base import ImageTransformPipelineComponent


class SimpleTransformService(ImageTransformPipelineComponent):
"""
Pipeline component for transforming an image. The service is designed for applying transform predictors that
take an image as numpy array as input and return the same. The service itself will change the underlying metadata
like height and width of the returned transform.

This component is meant to be used at the very first stage of a pipeline. If components have already returned image
annotations then this component will currently not re-calculate bounding boxes in terms of the transformed image.
It will raise a warning (at runtime) if image annotations have already been appended.
"""

def __init__(self, transform_predictor: ImageTransformer):
"""

:param transform_predictor: image transformer
"""
super().__init__(self._get_name(transform_predictor.name), transform_predictor)

def serve(self, dp: Image) -> None:
if dp.annotations:
logger.warning(
"%s has already received image with image annotations. These annotations will not "
"be transformed and might cause unexpected output in your pipeline.", self.name
)
if dp.image is not None:
np_image_transform = self.transform_predictor.transform(dp.image)
self.dp_manager.datapoint.clear_image(True)
self.dp_manager.datapoint.image = np_image_transform

def clone(self) -> "SimpleTransformService":
return self.__class__(self.transform_predictor)

def get_meta_annotation(self) -> JsonDict:
return dict(
[
("image_annotations", []),
("sub_categories", {}),
("relationships", {}),
("summaries", []),
]
)

@staticmethod
def _get_name(transform_name: str) -> str:
return f"simple_transform_{transform_name}"
19 changes: 19 additions & 0 deletions deepdoctection/utils/file_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -415,6 +415,25 @@ def scipy_available() -> bool:
return bool(_SCIPY_AVAILABLE)


# jdeskew dependency
_JDESKEW_AVAILABLE = importlib.util.find_spec("jdeskew") is not None
_JDESKEW_ERR_MSG = "jdeskew must be installed. >> pip install jdeskew"


def jdeskew_available() -> bool:
"""
Returns True if jdeskew is installed
"""
return bool(_JDESKEW_AVAILABLE)


def get_jdeskew_requirement() -> Requirement:
"""
Returns jdeskew requirement.
"""
return "jdeskew", jdeskew_available(), _JDESKEW_ERR_MSG


# scikit-learn dependencies
_SKLEARN_AVAILABLE = importlib.util.find_spec("sklearn") is not None
_SKLEARN_ERR_MSG = "scikit-learn must be installed. >> pip install scikit-learn==1.0.2"
Expand Down
3 changes: 3 additions & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,9 @@ ignore_missing_imports = True
[mypy-doctr.*]
ignore_missing_imports = True

[mypy-jdeskew.*]
ignore_missing_imports = True

[mypy-deepdoctection.extern.tp.*]
ignore_missing_imports = True
ignore_errors = True
Expand Down
4 changes: 3 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@ def get_version():
"transformers",
"detectron2 @ git+https://github.com/facebookresearch/detectron2.git",
# other third party related dependencies (services or DL libraries). Must be installed by users
"jdeskew",
"boto3",
"pdfplumber>=0.7.1",
"tensorflow-addons>=0.13.0",
Expand Down Expand Up @@ -132,7 +133,7 @@ def deps_list(*pkgs: str):


# remaining dependencies to use models that neither require TF nor PyTorch
additional_deps = deps_list("boto3", "pdfplumber", "fasttext")
additional_deps = deps_list("boto3", "pdfplumber", "fasttext", "jdeskew")

# Tensorflow dependencies
tf_deps = deps_list("tensorpack", "protobuf", "tensorflow-addons", "python-doctr")
Expand All @@ -159,6 +160,7 @@ def deps_list(*pkgs: str):
"lxml",
"lxml-stubs",
"pycocotools",
"jdeskew",
)


Expand Down
Loading
0