Skip to content

eval

Evaluation

Class used for evaluation.

Parameters:

Name Type Description Default
blocks KlinkerBlockManager

KlinkerBlockManager: Blocking result

required
gold DataFrame

pd.DataFrame: Gold standard pairs as two column dataframe

required
left_data_len int

int: number of entities in left dataset

required
right_data_len int

int: number of entities in right dataset

required
Source code in klinker/eval.py
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
class Evaluation:
    """Class used for evaluation.

    Args:
        blocks: KlinkerBlockManager: Blocking result
        gold: pd.DataFrame: Gold standard pairs as two column dataframe
        left_data_len: int: number of entities in left dataset
        right_data_len: int: number of entities in right dataset
    """

    def __init__(
        self,
        blocks: KlinkerBlockManager,
        gold: pd.DataFrame,
        left_data_len: int,
        right_data_len: int,
    ):
        self._check_consistency(blocks, gold)

        left_col = gold.columns[0]
        right_col = gold.columns[1]

        self.gold_pair_set = set(zip(gold[left_col], gold[right_col]))
        self._calc_tp_fp_fn(blocks)

        self.comp_without_blocking = left_data_len * right_data_len
        self.mean_block_size = blocks.mean_block_size

    def _calc_tp_fp_fn(self, blocks: KlinkerBlockManager):
        tp_pairs = set()
        fp = 0
        for _pair_number, pair in enumerate(blocks.all_pairs(), start=1):
            if pair in self.gold_pair_set:
                tp_pairs.add(pair)
            else:
                fp += 1
        self.tp_set = tp_pairs
        self.fn_set = self.gold_pair_set - self.tp_set  # type: ignore
        self.false_negative = len(self.fn_set)
        self.true_positive = len(self.tp_set)
        self.false_positive = fp
        self.comp_with_blocking = _pair_number

    def _check_consistency(self, blocks: KlinkerBlockManager, gold: pd.DataFrame):
        if not len(gold.columns) == 2:
            raise ValueError("Only binary matching supported!")
        if not set(blocks.blocks.columns) == set(gold.columns):
            raise ValueError(
                "Blocks and gold standard frame need to have the same columns!"
            )

    @classmethod
    def from_dataset(
        cls,
        blocks: KlinkerBlockManager,
        dataset: KlinkerDataset,
    ) -> "Evaluation":
        """Helper function to initialise evaluation with dataset.

        Args:
          blocks: KlinkerBlockManager: Calculated blocks
          dataset: KlinkerDataset: Dataset that was used for blocking

        Returns:
            eval instance

        Examples:

            >>> # doctest: +SKIP
            >>> from sylloge import MovieGraphBenchmark
            >>> from klinker.data import KlinkerDataset
            >>> ds = KlinkerDataset.from_sylloge(MovieGraphBenchmark(),clean=True)
            >>> from klinker.blockers import TokenBlocker
            >>> blocks = TokenBlocker().assign(left=ds.left, right=ds.right)
            >>> from klinker.eval import Evaluation
            >>> ev = Evaluation.from_dataset(blocks, ds)
            >>> ev.to_dict()
            {'recall': 0.993933265925177, 'precision': 0.002804877004859314, 'f_measure': 0.005593967847488974, 'reduction_ratio': 0.9985747694185365, 'h3r': 0.9962486115318822, 'mean_block_size': 10.160596863935256}

        """
        return cls(
            blocks=blocks,
            gold=dataset.gold,
            left_data_len=len(dataset.left),
            right_data_len=len(dataset.right),
        )

    @property
    def recall(self) -> float:
        return self.true_positive / (self.true_positive + self.false_negative)

    @property
    def precision(self) -> float:
        return self.true_positive / (self.true_positive + self.false_positive)

    @property
    def f_measure(self) -> float:
        rec = self.recall
        prec = self.precision
        return harmonic_mean(a=rec, b=prec)

    @property
    def reduction_ratio(self) -> float:
        return 1 - (self.comp_with_blocking / self.comp_without_blocking)

    @property
    def h3r(self) -> float:
        rr = self.reduction_ratio
        rec = self.recall
        return harmonic_mean(a=rr, b=rec)

    def __repr__(self) -> str:
        return f"Evaluation: {self.to_dict()}"

    def to_dict(self) -> Dict[str, float]:
        return {
            "recall": self.recall,
            "precision": self.precision,
            "f_measure": self.f_measure,
            "reduction_ratio": self.reduction_ratio,
            "h3r": self.h3r,
            "mean_block_size": self.mean_block_size,
        }

from_dataset(blocks, dataset) classmethod

Helper function to initialise evaluation with dataset.

Parameters:

Name Type Description Default
blocks KlinkerBlockManager

KlinkerBlockManager: Calculated blocks

required
dataset KlinkerDataset

KlinkerDataset: Dataset that was used for blocking

required

Returns:

Type Description
Evaluation

eval instance

Examples:

>>> # doctest: +SKIP
>>> from sylloge import MovieGraphBenchmark
>>> from klinker.data import KlinkerDataset
>>> ds = KlinkerDataset.from_sylloge(MovieGraphBenchmark(),clean=True)
>>> from klinker.blockers import TokenBlocker
>>> blocks = TokenBlocker().assign(left=ds.left, right=ds.right)
>>> from klinker.eval import Evaluation
>>> ev = Evaluation.from_dataset(blocks, ds)
>>> ev.to_dict()
{'recall': 0.993933265925177, 'precision': 0.002804877004859314, 'f_measure': 0.005593967847488974, 'reduction_ratio': 0.9985747694185365, 'h3r': 0.9962486115318822, 'mean_block_size': 10.160596863935256}
Source code in klinker/eval.py
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
@classmethod
def from_dataset(
    cls,
    blocks: KlinkerBlockManager,
    dataset: KlinkerDataset,
) -> "Evaluation":
    """Helper function to initialise evaluation with dataset.

    Args:
      blocks: KlinkerBlockManager: Calculated blocks
      dataset: KlinkerDataset: Dataset that was used for blocking

    Returns:
        eval instance

    Examples:

        >>> # doctest: +SKIP
        >>> from sylloge import MovieGraphBenchmark
        >>> from klinker.data import KlinkerDataset
        >>> ds = KlinkerDataset.from_sylloge(MovieGraphBenchmark(),clean=True)
        >>> from klinker.blockers import TokenBlocker
        >>> blocks = TokenBlocker().assign(left=ds.left, right=ds.right)
        >>> from klinker.eval import Evaluation
        >>> ev = Evaluation.from_dataset(blocks, ds)
        >>> ev.to_dict()
        {'recall': 0.993933265925177, 'precision': 0.002804877004859314, 'f_measure': 0.005593967847488974, 'reduction_ratio': 0.9985747694185365, 'h3r': 0.9962486115318822, 'mean_block_size': 10.160596863935256}

    """
    return cls(
        blocks=blocks,
        gold=dataset.gold,
        left_data_len=len(dataset.left),
        right_data_len=len(dataset.right),
    )

compare_blocks(blocks_a, blocks_b, dataset, improvement_metric='h3r')

Compare similarity between blocks using calculated eval.

Parameters:

Name Type Description Default
blocks_a KlinkerBlockManager

KlinkerBlockManager: one blocking result

required
blocks_b KlinkerBlockManager

KlinkerBlockManager: other blocking result

required
dataset KlinkerDataset

KlinkerDataset: dataset from which blocks where calculated

required
improvement_metric str

str: used to calculate improvement

'h3r'

Returns:

Type Description
Dict

Dictionary with improvement metrics.

Source code in klinker/eval.py
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
def compare_blocks(
    blocks_a: KlinkerBlockManager,
    blocks_b: KlinkerBlockManager,
    dataset: KlinkerDataset,
    improvement_metric: str = "h3r",
) -> Dict:
    """Compare similarity between blocks using calculated eval.

    Args:
      blocks_a: KlinkerBlockManager: one blocking result
      blocks_b: KlinkerBlockManager: other blocking result
      dataset: KlinkerDataset: dataset from which blocks where calculated
      improvement_metric: str: used to calculate improvement

    Returns:
        Dictionary with improvement metrics.
    """
    eval_a = Evaluation.from_dataset(blocks=blocks_a, dataset=dataset)
    eval_b = Evaluation.from_dataset(blocks=blocks_b, dataset=dataset)
    return compare_blocks_from_eval(
        blocks_a=blocks_a,
        blocks_b=blocks_b,
        eval_a=eval_a,
        eval_b=eval_b,
        dataset=dataset,
        improvement_metric=improvement_metric,
    )

compare_blocks_from_eval(blocks_a, blocks_b, eval_a, eval_b, dataset, improvement_metric='h3r')

Compare similarity between blocks using calculated eval.

Parameters:

Name Type Description Default
blocks_a KlinkerBlockManager

KlinkerBlockManager: one blocking result

required
blocks_b KlinkerBlockManager

KlinkerBlockManager: other blocking result

required
eval_a Evaluation

Evaluation: eval of a

required
eval_b Evaluation

Evaluation: eval of b

required
dataset KlinkerDataset

KlinkerDataset: dataset from which blocks where calculated

required
improvement_metric str

str: used to calculate improvement

'h3r'

Returns:

Type Description
Dict

Dictionary with improvement metrics.

Source code in klinker/eval.py
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
def compare_blocks_from_eval(
    blocks_a: KlinkerBlockManager,
    blocks_b: KlinkerBlockManager,
    eval_a: Evaluation,
    eval_b: Evaluation,
    dataset: KlinkerDataset,
    improvement_metric: str = "h3r",
) -> Dict:
    """Compare similarity between blocks using calculated eval.

    Args:
      blocks_a: KlinkerBlockManager: one blocking result
      blocks_b: KlinkerBlockManager: other blocking result
      eval_a: Evaluation: eval of a
      eval_b: Evaluation: eval of b
      dataset: KlinkerDataset: dataset from which blocks where calculated
      improvement_metric: str: used to calculate improvement

    Returns:
        Dictionary with improvement metrics.
    """

    def percent_improvement(new: float, old: float):
        return (new - old) / old

    blocks_both = KlinkerBlockManager.combine(blocks_a, blocks_b)
    dice_tp = dice(eval_a.tp_set, eval_b.tp_set)

    eval_both = Evaluation.from_dataset(blocks=blocks_both, dataset=dataset)
    eval_both_metric = eval_both.to_dict()[improvement_metric]
    improvement_a = percent_improvement(
        eval_both_metric, eval_a.to_dict()[improvement_metric]
    )
    improvement_b = percent_improvement(
        eval_both_metric, eval_b.to_dict()[improvement_metric]
    )
    return {
        "eval_a": eval_a,
        "eval_b": eval_b,
        "dice_tp": dice_tp,
        "eval_both": eval_both,
        "improvement_a": improvement_a,
        "improvement_b": improvement_b,
    }

dice(a, b)

Calculate Soerensen-Dice Coefficient.

Source code in klinker/eval.py
143
144
145
def dice(a: Set, b: Set) -> float:
    """Calculate Soerensen-Dice Coefficient."""
    return (2 * len(a.intersection(b))) / (len(a) + len(b))

harmonic_mean(a, b)

Calculate harmonic mean between a and b.

Source code in klinker/eval.py
11
12
13
14
15
def harmonic_mean(a: float, b: float) -> float:
    """Calculate harmonic mean between a and b."""
    if a + b == 0:
        return 0
    return 2 * ((a * b) / (a + b))

multiple_block_comparison(blocks, dataset, improvement_metric='h3r')

Compare multiple blocking strategies.

Parameters:

Name Type Description Default
blocks Dict[str, KlinkerBlockManager]

Dict[str, KlinkerBlockManager]: Blocking results

required
dataset KlinkerDataset

KlinkerDataset: Dataset that was used for blocking

required
improvement_metric str

str: Metric used for calculating improvement

'h3r'

Returns:

Type Description
DataFrame

DataFrame with improvement values.

Source code in klinker/eval.py
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
def multiple_block_comparison(
    blocks: Dict[str, KlinkerBlockManager],
    dataset: KlinkerDataset,
    improvement_metric: str = "h3r",
) -> pd.DataFrame:
    """Compare multiple blocking strategies.

    Args:
      blocks: Dict[str, KlinkerBlockManager]: Blocking results
      dataset: KlinkerDataset: Dataset that was used for blocking
      improvement_metric: str: Metric used for calculating improvement

    Returns:
        DataFrame with improvement values.
    """
    blocks_with_eval = OrderedDict(
        {
            name: (
                blk,
                Evaluation.from_dataset(blocks=blk, dataset=dataset),
            )
            for name, blk in blocks.items()
        }
    )
    result = []
    seen_pairs = set()
    for (b_a_name, (blocks_a, eval_a)) in blocks_with_eval.items():
        for (b_b_name, (blocks_b, eval_b)) in blocks_with_eval.items():
            if (
                b_a_name != b_b_name
                and (b_a_name, b_b_name) not in seen_pairs
                and (
                    b_b_name,
                    b_a_name,
                )
                not in seen_pairs
            ):
                comparison = compare_blocks_from_eval(
                    blocks_a, blocks_b, eval_a, eval_b, dataset, "h3r"
                )
                result.append(
                    [
                        b_a_name,
                        b_b_name,
                        comparison["improvement_a"],
                        comparison["dice_tp"],
                    ]
                )
                result.append(
                    [
                        b_b_name,
                        b_a_name,
                        comparison["improvement_b"],
                        comparison["dice_tp"],
                    ]
                )
    result_df = pd.DataFrame(
        result, columns=["base", "other", "improvement", "dice_tp"]
    )
    seen_pairs.add((b_a_name, b_b_name))
    return result_df