[go: up one dir, main page]
More Web Proxy on the site http://driver.im/
Skip to content

Mdutils

laia.utils.mdutils

Attributes

METRIC_COLUMN module-attribute

METRIC_COLUMN = 'Metric'

Classes

Statistics

Statistics(filename: str)
Source code in laia/utils/mdutils.py
80
81
def __init__(self, filename: str) -> None:
    self.document = MdUtils(file_name=filename, title="Statistics")
Attributes
HEADERS class-attribute instance-attribute
HEADERS = {
    "Images": "Images statistics",
    "Labels": "Labels statistics",
    "Chars": "Characters statistics",
}
document instance-attribute
document = MdUtils(file_name=filename, title='Statistics')
Functions
create_image_statistics
create_image_statistics(
    widths: List[int], heights: List[int]
)

Compute statistics on image sizes and write them to file.

Source code in laia/utils/mdutils.py
 98
 99
100
101
102
103
104
105
106
107
def create_image_statistics(self, widths: List[int], heights: List[int]):
    """
    Compute statistics on image sizes and write them to file.
    """
    self._write_section(
        table=create_table(
            data={"Width": widths, "Height": heights}, count=True, total=False
        ),
        title=Statistics.HEADERS["Images"],
    )
create_label_statistics
create_label_statistics(
    labels: List[str], delimiters: List[str]
)

Compute statistics on text labels and write them to file.

Source code in laia/utils/mdutils.py
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
def create_label_statistics(self, labels: List[str], delimiters: List[str]):
    """
    Compute statistics on text labels and write them to file.
    """
    char_counter = Counter()
    data = defaultdict(list)

    for text in labels:
        char_counter.update(text)
        data["Chars"].append(len(text))
        data["Words"].append(len(char_to_word_seq("".join(text), delimiters)))

    self._write_section(
        table=create_table(data=data),
        title=Statistics.HEADERS["Labels"],
    )

    self.create_character_occurrences_statistics(char_counter)
create_character_occurrences_statistics
create_character_occurrences_statistics(
    char_counter: Counter,
)

Compute statistics on the character distribution and write them to file.

Source code in laia/utils/mdutils.py
128
129
130
131
132
133
134
135
136
137
138
139
140
141
def create_character_occurrences_statistics(self, char_counter: Counter):
    """
    Compute statistics on the character distribution and write them to file.
    """
    char_occurrences = PrettyTable(
        field_names=["Character", "Occurrence"],
    )
    char_occurrences.align.update({"Character": "l", "Occurrence": "r"})
    char_occurrences.set_style(MARKDOWN)
    char_occurrences.add_rows(list(char_counter.most_common()))

    self._write_section(
        table=char_occurrences, title=Statistics.HEADERS["Chars"], level=3
    )
create_split_section
create_split_section(
    split, widths, heights, labels, delimiters
)
Source code in laia/utils/mdutils.py
143
144
145
146
147
def create_split_section(self, split, widths, heights, labels, delimiters):
    # prepare the data
    self.document.new_header(level=1, title=split.capitalize())
    self.create_image_statistics(widths=widths, heights=heights)
    self.create_label_statistics(labels=labels, delimiters=delimiters)

Functions

create_table

create_table(
    data: Dict[str, List[Union[int, float]]],
    count: bool = False,
    total: bool = True,
) -> PrettyTable

Generate a PrettyTable object from an input dictionary. Compute min, max, mean, median, total by default. Total can be disabled. Count (length) computation can be enabled.

Parameters:

Name Type Description Default
data Dict[str, List[Union[int, float]]]

Data to display. Each key will be made into a column.

required
count bool

Whether to display an additional row for data count.

False
total bool

Whether to display an additional row for data total sum.

True

Returns:

Name Type Description
PrettyTable PrettyTable

A Markdown table

Source code in laia/utils/mdutils.py
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
def create_table(
    data: Dict[str, List[Union[int, float]]],
    count: bool = False,
    total: bool = True,
) -> PrettyTable:
    """
    Generate a PrettyTable object from an input dictionary.
    Compute min, max, mean, median, total by default.
    Total can be disabled. Count (length) computation can be enabled.

    Args:
        data: Data to display. Each key will be made into a column.
        count: Whether to display an additional row for data count.
        total: Whether to display an additional row for data total sum.

    Returns:
        PrettyTable: A Markdown table
    """

    statistics = PrettyTable(field_names=[METRIC_COLUMN, *data.keys()])
    statistics.align.update({METRIC_COLUMN: "l"})
    statistics.set_style(MARKDOWN)

    operations = []

    if count:
        operations.append(("Count", len, None))

    operations.extend(
        [
            ("Min", np.min, None),
            ("Max", np.max, None),
            ("Mean", np.mean, 2),
            ("Median", np.median, None),
        ]
    )
    if total:
        operations.append(("Total", np.sum, None))

    statistics.add_rows(
        [
            [
                col_name,
                *list(
                    map(
                        # Round values if needed
                        partial(round, ndigits=digits),
                        map(operator, data.values()),
                    )
                ),
            ]
            for col_name, operator, digits in operations
        ]
    )

    return statistics