8000 Evaluator running over pipelines & TEDS by JaMe76 · Pull Request #38 · deepdoctection/deepdoctection · GitHub
[go: up one dir, main page]
More Web Proxy on the site http://driver.im/
Skip to content

Evaluator running over pipelines & TEDS #38

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 31 commits into from
Jun 30, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
5f181d6
update evaluator [eval]
JaMe76 Jun 22, 2022
f57b219
let evaluator run on a pipeline [eval]
JaMe76 Jun 22, 2022
8f18706
start teds [eval]
JaMe76 Jun 23, 2022
cf68002
add dd_pipe_like arg for pubtabnet [dataset]
JaMe76 Jun 23, 2022
b91496a
provide meta data from predictors and components [pipe][extern]
JaMe76 Jun 24, 2022
8d88468
update meta annotations [pipe]
JaMe76 Jun 24, 2022
9a29815
update evaluator [eval]
JaMe76 Jun 24, 2022
332b0df
continue with teds [eval]
JaMe76 Jun 28, 2022
ef0e19b
update docs [docs]
JaMe76 Jun 28, 2022
96225ca
update evaluator [eval]
JaMe76 Jun 22, 2022
307cbde
let evaluator run on a pipeline [eval]
JaMe76 Jun 22, 2022
38b7787
start teds [eval]
JaMe76 Jun 23, 2022
d0cc38c
add dd_pipe_like arg for pubtabnet [dataset]
JaMe76 Jun 23, 2022
94d5616
provide meta data from predictors and components [pipe][extern]
JaMe76 Jun 24, 2022
447d6b3
update meta annotations [pipe]
JaMe76 Jun 24, 2022
924dd70
update evaluator [eval]
JaMe76 Jun 24, 2022
c641a08
continue with teds [eval]
JaMe76 Jun 28, 2022
43a8090
update docs [docs]
JaMe76 Jun 28, 2022
275c7f7
update -gitignore [gitignore]
JaMe76 Jun 29, 2022
3fa34ab
update eval testing [tests]
JaMe76 Jun 29, 2022
4f4ad88
fix lint issues [lint]
JaMe76 Jun 29, 2022
57dfba6
fix mypy [mypy]
JaMe76 Jun 29, 2022
7b115c8
fix test issues [tests]
JaMe76 Jun 29, 2022
a1691c1
Merge remote-tracking branch 'origin/eval' into eval
JaMe76 Jun 29, 2022
b6c1324
fix test issues [test]
JaMe76 Jun 29, 2022
62c61a6
fix test issues [test]
JaMe76 Jun 29, 2022
a11d8fa
fix test issues [test]
JaMe76 Jun 29, 2022
8e1aa93
fix test issues [test]
JaMe76 Jun 29, 2022
90abea8
fix test issues [test]
JaMe76 Jun 29, 2022
73192b0
fix test issues [test]
JaMe76 Jun 29, 2022
d5c694c
update docs [docs]
JaMe76 Jun 30, 2022
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions
Original file line number Diff line number Diff line change
Expand Up @@ -17,3 +17,4 @@ snippets.py
=3.0.0
/notebooks/own.ipynb
/notebooks/layout_parsing_structure.ipynb
/snipptes.py
11 changes: 10 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,16 @@ Alternatively, consult the

### Installation from source

Download the repository or clone via
In a virtual environment, run for PyTorch:

```
pip install git+https://github.com/deepdoctection/deepdoctection#egg=deepdoctection[source-pt]
```

This will install the package without cloning the whole repo. Do a similar command with `source-tf` to
install the package for Tensorflow.

Otherwise, download the repository or clone via

```
git clone https://github.com/deepdoctection/deepdoctection.git
Expand Down
2 changes: 2 additions & 0 deletions deepdoctection/dataflow/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,10 @@
MapData,
MapDataComponent,
MultiProcessMapData,
MultiThreadMapData,
ProxyDataFlow,
RepeatedData,
MultiThreadMapData,
)

from .common import *
Expand Down
3 changes: 2 additions & 1 deletion deepdoctection/datapoint/annotation.py
Original file line number Diff line number Diff line change
Expand Up @@ -237,7 +237,8 @@ def remove_sub_category(self, key: str) -> None:
:param key: A key to a sub category.
"""

self.sub_categories.pop(key)
if key in self.sub_categories:
self.sub_categories.pop(key)

def dump_relationship(self, key: str, annotation_id: str) -> None:
"""
Expand Down
2 changes: 2 additions & 0 deletions deepdoctection/datasets/instances/fintabnet.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,6 +189,8 @@ def _map_filename(dp: JsonDict, workdir: Path) -> JsonDict:
load_image,
fake_score=fake_score,
rows_and_cols=rows_and_cols,
dd_pipe_like=False,
is_fintabnet=True,
)
if use_multi_proc:
df = MultiProcessMapData(
Expand Down
2 changes: 1 addition & 1 deletion deepdoctection/datasets/instances/iiitar13k.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@
from ..registry import dataset_registry

if lxml_available():
from lxml import etree # type: ignore
from lxml import etree

_NAME = "iiitar13k"

Expand Down
2 changes: 1 addition & 1 deletion deepdoctection/datasets/instances/pubtables1m.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@
from ..registry import dataset_registry

if lxml_available():
from lxml import etree # type: ignore
from lxml import etree

_NAME = "pubtables1m"

Expand Down
12 changes: 10 additions & 2 deletions deepdoctection/datasets/instances/pubtabnet.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@
from ...mapper.cats import cat_to_sub_cat, filter_cat
from ...mapper.pubstruct import pub_to_image
from ...utils.detection_types import JsonDict
from ...utils.logger import log_once
from ...utils.logger import log_once, logger
from ...utils.settings import names
from ..base import _BuiltInDataset
from ..dataflow_builder import DataFlowBaseBuilder
Expand Down Expand Up @@ -73,7 +73,7 @@
_LOCATION = "pubtabnet"
_ANNOTATION_FILES: Mapping[str, str] = {"all": "PubTabNet_2.0.0.jsonl"}

_INIT_CATEGORIES = [names.C.CELL, names.C.ITEM]
_INIT_CATEGORIES = [names.C.CELL, names.C.ITEM, names.C.TAB, names.C.WORD]
_SUB_CATEGORIES: Dict[str, Dict[str, List[str]]]
_SUB_CATEGORIES = {
names.C.ITEM: {"row_col": [names.C.ROW, names.C.COL]},
Expand All @@ -86,6 +86,8 @@
},
names.C.HEAD: {names.C.RN: [], names.C.CN: [], names.C.RS: [], names.C.CS: []},
names.C.BODY: {names.C.RN: [], names.C.CN: [], names.C.RS: [], names.C.CS: []},
names.C.TAB: {names.C.HTAB: [names.C.HTAB]},
names.C.WORD: {names.C.CHARS: [names.C.CHARS]},
}


Expand Down Expand Up @@ -139,6 +141,10 @@ def build(self, **kwargs: Union[str, int]) -> DataFlow:
load_image = kwargs.get("load_image", False)
rows_and_cols = kwargs.get("rows_and_cols", False)
fake_score = kwargs.get("fake_score", False)
dd_pipe_like = kwargs.get("dd_pipe_like" F438 ;, False)
if dd_pipe_like:
logger.info("When 'dd_pipe_like' is set to True will reset 'load_image' to True")
load_image = True

# Load
dataset_split = self.annotation_files["all"]
Expand All @@ -158,6 +164,8 @@ def replace_filename(dp: JsonDict) -> JsonDict:
load_image,
fake_score=fake_score,
rows_and_cols=rows_and_cols,
dd_pipe_like=dd_pipe_like,
is_fintabnet=False,
)

df = MapData(df, pub_mapper)
Expand Down
4 changes: 4 additions & 0 deletions deepdoctection/eval/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,12 @@
for training.
"""

from ..utils.file_utils import apted_available
from .accmetric import *
from .base import *
from .cocometric import *
from .eval import *
from .registry import *

if apted_available():
from .tedsmetric import *
23 changes: 14 additions & 9 deletions deepdoctection/eval/accmetric.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,9 +53,9 @@ def accuracy(label_gt: List[int], label_predictions: List[int], masks: Optional[
:return: Accuracy score with only unmasked values to be considered
"""

np_label_gt, np_label_predictions = np.asarray(label_gt), np.asarray(label_predictions)
np_label_gt, np_label_pr = np.asarray(label_gt), np.asarray(label_predictions)
assert len(np_label_gt) == len(
np_label_predictions
np_label_pr
), f"length of label_gt ({len(np_label_gt)}) and label_predictions ({len(np_label_gt)}) must be equal"

if masks is not None:
Expand All @@ -65,8 +65,8 @@ def accuracy(label_gt: List[int], label_predictions: List[int], masks: Optional[
np_masks = np.asarray(masks)
np_masks.astype(bool)
np_label_gt = np_label_gt[np_masks]
np_label_predictions = np_label_predictions[np_masks]
return np.array(accuracy_score(np_label_gt, np_label_predictions), dtype=float32)
np_label_pr = np_label_pr[np_masks]
return np.array(accuracy_score(np_label_gt, np_label_pr), dtype=float32)


@metric_registry.register("accuracy")
Expand Down Expand Up @@ -111,11 +111,11 @@ def get_distance(
cls, dataflow_gt: DataFlow, dataflow_predictions: DataFlow, categories: DatasetCategories
) -> List[JsonDict]:

labels_gt, labels_predictions = cls.dump(dataflow_gt, dataflow_predictions, categories)
labels_gt, labels_pr = cls.dump(dataflow_gt, dataflow_predictions, categories)

results = []
for key in labels_gt: # pylint: disable=C0206
res = cls.metric(labels_gt[key], labels_predictions[key])
res = cls.metric(labels_gt[key], labels_pr[key])
results.append({"key": key, "val": res, "num_samples": len(labels_gt[key])})
return results

Expand Down Expand Up @@ -168,6 +168,11 @@ def _category_sanity_checks(cls, categories: DatasetCategories) -> None:
def get_requirements(cls) -> List[Requirement]:
return [get_sklearn_requirement()]

@property
def sub_cats(self) -> Optional[Union[Dict[str, str], Dict[str, List[str]]]]:
""" sub cats"""
return self._sub_cats


def confusion(label_gt: List[int], label_predictions: List[int], masks: Optional[List[int]] = None) -> NDArray[float32]:
"""
Expand All @@ -181,15 +186,15 @@ def confusion(label_gt: List[int], label_predictions: List[int], masks: Optional
:return: numpy array
"""

np_label_gt, np_label_predictions = np.asarray(label_gt), np.asarray(label_predictions)
np_label_gt, np_label_pr = np.asarray(label_gt), np.asarray(label_predictions)

if masks is not None:
np_masks = np.asarray(masks)
np_masks.astype(bool)
np_label_gt = np_label_gt[np_masks]
np_label_predictions = np_label_predictions[np_masks]
np_label_pr = np_label_pr[np_masks]

return confusion_matrix(np_label_gt, np_label_predictions)
return confusion_matrix(np_label_gt, np_label_pr)


@metric_registry.register("confusion")
Expand Down
6 changes: 5 additions & 1 deletion deepdoctection/eval/cocometric.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@

from ..dataflow import DataFlow
from ..datasets.info import DatasetCategories
from ..mapper.cats import re_assign_cat_ids
from ..mapper.cocostruct import image_to_coco
from ..utils.detection_types import JsonDict
from ..utils.file_utils import Requirement, cocotools_available, get_cocotools_requirement
Expand Down Expand Up @@ -126,14 +127,17 @@ class CocoMetric(MetricBase):
def dump(
cls, dataflow_gt: DataFlow, dataflow_predictions: DataFlow, categories: DatasetCategories
) -> Tuple["COCO", "COCO"]:
cats = [{"id": int(k), "name": v} for k, v in categories.get_categories(as_dict=True).items()]
cats = [{"id": int(k), "name": v} for k, v in categories.get_categories(as_dict=True, filtered=True).items()]
imgs_gt, imgs_pr = [], []
anns_gt, anns_pr = [], []

dataflow_gt.reset_state(), dataflow_predictions.reset_state() # pylint: disable=W0106

for dp_gt, dp_pred in zip(dataflow_gt, dataflow_predictions):
img_gt, ann_gt = cls.mapper(dp_gt) # type: ignore
dp_pred = re_assign_cat_ids(categories.get_categories(as_dict=True, filtered=True, name_as_key=True))( # pylint: disable=E1120
dp_pred
)
img_pr, ann_pr = cls.mapper(dp_pred) # type: ignore
imgs_gt.append(img_gt)
imgs_pr.append(img_pr)
Expand Down
Loading
0