Skip to content

eval

Evaluation

Class used for evaluation.

Source code in klinker/eval.py
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
class Evaluation:
    """Class used for evaluation."""

    def __init__(
        self,
        *,
        true_positive_set: Set[Tuple[Any, Any]],
        gold_pair_set: Set[Tuple[Any, Any]],
        false_positive: int,
        comp_with_blocking: int,
        comp_without_blocking: int,
        false_positive_set: Optional[Set[Tuple[Any, Any]]] = None,
    ):
        self.gold_pair_set = gold_pair_set
        self.comp_without_blocking = comp_without_blocking
        self.tp_set = true_positive_set
        self.fn_set = self.gold_pair_set - self.tp_set  # type: ignore
        self.false_negative = len(self.fn_set)
        self.true_positive = len(self.tp_set)
        self.false_positive = false_positive
        self.comp_with_blocking = comp_with_blocking
        self.false_positive_set = false_positive_set

    @staticmethod
    def _check_consistency(blocks: KlinkerBlockManager, gold: pd.DataFrame):
        if isinstance(
            blocks,
            (CompositeWithNNBasedKlinkerBlockManager, NNBasedKlinkerBlockManager),
        ):
            return
        if not len(gold.columns) == 2:
            raise ValueError("Only binary matching supported!")
        if not set(blocks.blocks.columns) == set(gold.columns):
            raise ValueError(
                "Blocks and gold standard frame need to have the same columns!"
            )

    @classmethod
    def from_blocks_and_gold(
        cls,
        blocks: KlinkerBlockManager,
        gold: pd.DataFrame,
        left_data_len: int,
        right_data_len: int,
        keep_false_positive_set: bool = False,
    ):
        Evaluation._check_consistency(blocks, gold)

        left_col = gold.columns[0]
        right_col = gold.columns[1]

        gold_pair_set = set(zip(gold[left_col], gold[right_col]))
        tp_pairs: Set[Tuple[Any, Any]] = set()
        fp = 0
        fp_set: Optional[Set[Tuple[Any, Any]]] = (
            set() if keep_false_positive_set else None
        )
        for _pair_number, pair in enumerate(blocks.all_pairs(), start=1):
            if pair in gold_pair_set:
                left, right = pair  # for mypy
                tp_pairs.add((left, right))
            else:
                fp += 1
                if keep_false_positive_set:
                    assert fp_set
                    fp_set.add((left, right))
        comp_without_blocking = left_data_len * right_data_len
        return cls(
            true_positive_set=tp_pairs,
            gold_pair_set=gold_pair_set,
            false_positive=fp,
            comp_with_blocking=_pair_number,
            comp_without_blocking=comp_without_blocking,
            false_positive_set=fp_set,
        )

    @classmethod
    def from_dataset(
        cls,
        blocks: KlinkerBlockManager,
        dataset: KlinkerDataset,
        keep_false_positive_set: bool = False,
    ) -> "Evaluation":
        """Helper function to initialise evaluation with dataset.

        Args:
        ----
          blocks: KlinkerBlockManager: Calculated blocks
          dataset: KlinkerDataset: Dataset that was used for blocking
          keep_false_positive_set: Whether to keep false positive

        Returns:
        -------
            eval instance

        Examples:
        --------
            >>> # doctest: +SKIP
            >>> from sylloge import MovieGraphBenchmark
            >>> from klinker.data import KlinkerDataset
            >>> ds = KlinkerDataset.from_sylloge(MovieGraphBenchmark(),clean=True)
            >>> from klinker.blockers import TokenBlocker
            >>> blocks = TokenBlocker().assign(left=ds.left, right=ds.right)
            >>> from klinker.eval import Evaluation
            >>> ev = Evaluation.from_dataset(blocks, ds)
            >>> ev.to_dict()
            {'recall': 0.993933265925177, 'precision': 0.002804877004859314, 'f_measure': 0.005593967847488974, 'reduction_ratio': 0.9985747694185365, 'h3r': 0.9962486115318822}

        """
        return cls.from_blocks_and_gold(
            blocks=blocks,
            gold=dataset.gold,
            left_data_len=len(dataset.left),
            right_data_len=len(dataset.right),
            keep_false_positive_set=keep_false_positive_set,
        )

    @classmethod
    def from_joined_evals(cls, eval_a: "Evaluation", eval_b: "Evaluation"):
        if (
            eval_a.gold_pair_set != eval_b.gold_pair_set
            or eval_a.comp_without_blocking != eval_b.comp_without_blocking
        ):
            raise ValueError("Can only join on identical datasets!")
        joined_tp_set = eval_a.tp_set.union(eval_b.tp_set)
        joined_comp_with_blocking = (
            eval_a.comp_with_blocking + eval_b.comp_with_blocking
        )
        joined_fp = eval_a.false_positive + eval_b.false_positive
        return cls(
            true_positive_set=joined_tp_set,
            gold_pair_set=eval_a.gold_pair_set,
            false_positive=joined_fp,
            comp_with_blocking=joined_comp_with_blocking,
            comp_without_blocking=eval_a.comp_without_blocking,
        )

    @property
    def pairs_completeness(self) -> float:
        return self.true_positive / len(self.gold_pair_set)

    @property
    def recall(self) -> float:
        denom = self.true_positive + self.false_negative
        if denom == 0:
            return 0
        return self.true_positive / denom

    @property
    def precision(self) -> float:
        denom = self.true_positive + self.false_positive
        if denom == 0:
            return 0
        return self.true_positive / denom

    @property
    def f_measure(self) -> float:
        rec = self.recall
        prec = self.precision
        return harmonic_mean(a=rec, b=prec)

    @property
    def reduction_ratio(self) -> float:
        return 1 - (self.comp_with_blocking / self.comp_without_blocking)

    @property
    def h3r(self) -> float:
        rr = self.reduction_ratio
        rec = self.recall
        return harmonic_mean(a=rr, b=rec)

    def __repr__(self) -> str:
        return f"Evaluation: {self.to_dict()}"

    def to_dict(self) -> Dict[str, float]:
        return {
            "recall": self.recall,
            "precision": self.precision,
            "f_measure": self.f_measure,
            "reduction_ratio": self.reduction_ratio,
            "h3r": self.h3r,
            "pairs_completeness": self.pairs_completeness,
        }

from_dataset(blocks, dataset, keep_false_positive_set=False) classmethod

Helper function to initialise evaluation with dataset.


blocks: KlinkerBlockManager: Calculated blocks dataset: KlinkerDataset: Dataset that was used for blocking keep_false_positive_set: Whether to keep false positive


eval instance

Examples:


>>> # doctest: +SKIP
>>> from sylloge import MovieGraphBenchmark
>>> from klinker.data import KlinkerDataset
>>> ds = KlinkerDataset.from_sylloge(MovieGraphBenchmark(),clean=True)
>>> from klinker.blockers import TokenBlocker
>>> blocks = TokenBlocker().assign(left=ds.left, right=ds.right)
>>> from klinker.eval import Evaluation
>>> ev = Evaluation.from_dataset(blocks, ds)
>>> ev.to_dict()
{'recall': 0.993933265925177, 'precision': 0.002804877004859314, 'f_measure': 0.005593967847488974, 'reduction_ratio': 0.9985747694185365, 'h3r': 0.9962486115318822}
Source code in klinker/eval.py
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
@classmethod
def from_dataset(
    cls,
    blocks: KlinkerBlockManager,
    dataset: KlinkerDataset,
    keep_false_positive_set: bool = False,
) -> "Evaluation":
    """Helper function to initialise evaluation with dataset.

    Args:
    ----
      blocks: KlinkerBlockManager: Calculated blocks
      dataset: KlinkerDataset: Dataset that was used for blocking
      keep_false_positive_set: Whether to keep false positive

    Returns:
    -------
        eval instance

    Examples:
    --------
        >>> # doctest: +SKIP
        >>> from sylloge import MovieGraphBenchmark
        >>> from klinker.data import KlinkerDataset
        >>> ds = KlinkerDataset.from_sylloge(MovieGraphBenchmark(),clean=True)
        >>> from klinker.blockers import TokenBlocker
        >>> blocks = TokenBlocker().assign(left=ds.left, right=ds.right)
        >>> from klinker.eval import Evaluation
        >>> ev = Evaluation.from_dataset(blocks, ds)
        >>> ev.to_dict()
        {'recall': 0.993933265925177, 'precision': 0.002804877004859314, 'f_measure': 0.005593967847488974, 'reduction_ratio': 0.9985747694185365, 'h3r': 0.9962486115318822}

    """
    return cls.from_blocks_and_gold(
        blocks=blocks,
        gold=dataset.gold,
        left_data_len=len(dataset.left),
        right_data_len=len(dataset.right),
        keep_false_positive_set=keep_false_positive_set,
    )

compare_blocks(blocks_a, blocks_b, dataset, improvement_metrics='h3r')

Compare similarity between blocks using calculated eval.


blocks_a: KlinkerBlockManager: one blocking result blocks_b: KlinkerBlockManager: other blocking result dataset: KlinkerDataset: dataset from which blocks where calculated improvement_metric: Union[str, List[str]]: Metric(s) used for calculating improvement


Dictionary with improvement metrics.
Source code in klinker/eval.py
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
def compare_blocks(
    blocks_a: KlinkerBlockManager,
    blocks_b: KlinkerBlockManager,
    dataset: KlinkerDataset,
    improvement_metrics: Union[str, List[str]] = "h3r",
) -> Dict:
    """Compare similarity between blocks using calculated eval.

    Args:
    ----
      blocks_a: KlinkerBlockManager: one blocking result
      blocks_b: KlinkerBlockManager: other blocking result
      dataset: KlinkerDataset: dataset from which blocks where calculated
      improvement_metric: Union[str, List[str]]: Metric(s) used for calculating improvement

    Returns:
    -------
        Dictionary with improvement metrics.
    """
    eval_a = Evaluation.from_dataset(blocks=blocks_a, dataset=dataset)
    eval_b = Evaluation.from_dataset(blocks=blocks_b, dataset=dataset)
    return compare_blocks_from_eval(
        blocks_a=blocks_a,
        blocks_b=blocks_b,
        eval_a=eval_a,
        eval_b=eval_b,
        dataset=dataset,
        improvement_metrics=improvement_metrics,
    )

compare_blocks_from_eval(blocks_a, blocks_b, eval_a, eval_b, dataset, improvement_metrics='h3r')

Compare similarity between blocks using calculated eval.


blocks_a: KlinkerBlockManager: one blocking result blocks_b: KlinkerBlockManager: other blocking result eval_a: Evaluation: eval of a eval_b: Evaluation: eval of b dataset: KlinkerDataset: dataset from which blocks where calculated improvement_metric: Union[str, List[str]]: Metric(s) used for calculating improvement


Dictionary with improvement metrics.
Source code in klinker/eval.py
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
def compare_blocks_from_eval(
    blocks_a: KlinkerBlockManager,
    blocks_b: KlinkerBlockManager,
    eval_a: Evaluation,
    eval_b: Evaluation,
    dataset: KlinkerDataset,
    improvement_metrics: Union[str, List[str]] = "h3r",
) -> Dict:
    """Compare similarity between blocks using calculated eval.

    Args:
    ----
      blocks_a: KlinkerBlockManager: one blocking result
      blocks_b: KlinkerBlockManager: other blocking result
      eval_a: Evaluation: eval of a
      eval_b: Evaluation: eval of b
      dataset: KlinkerDataset: dataset from which blocks where calculated
      improvement_metric: Union[str, List[str]]: Metric(s) used for calculating improvement

    Returns:
    -------
        Dictionary with improvement metrics.
    """
    if isinstance(improvement_metrics, str):
        improvement_metrics = [improvement_metrics]

    def percent_improvement(new: float, old: float):
        return (new - old) / old

    dice_tp = dice(eval_a.tp_set, eval_b.tp_set)
    eval_both = Evaluation.from_joined_evals(eval_a, eval_b)
    result_dict = {
        "eval_a": eval_a,
        "eval_b": eval_b,
        "dice_tp": dice_tp,
        "eval_both": eval_both,
    }

    for im in improvement_metrics:
        eval_both_metric = eval_both.to_dict()[im]
        result_dict[f"improvement_a_{im}"] = percent_improvement(
            eval_both_metric, eval_a.to_dict()[im]
        )
        result_dict[f"improvement_b_{im}"] = percent_improvement(
            eval_both_metric, eval_b.to_dict()[im]
        )
    return result_dict

dice(a, b)

Calculate Soerensen-Dice Coefficient.

Source code in klinker/eval.py
329
330
331
def dice(a: Set, b: Set) -> float:
    """Calculate Soerensen-Dice Coefficient."""
    return (2 * len(a.intersection(b))) / (len(a) + len(b))

harmonic_mean(a, b)

Calculate harmonic mean between a and b.

Source code in klinker/eval.py
15
16
17
18
19
def harmonic_mean(a: float, b: float) -> float:
    """Calculate harmonic mean between a and b."""
    if a + b == 0:
        return 0
    return 2 * ((a * b) / (a + b))

multiple_block_comparison(blocks, dataset, improvement_metrics='h3r')

Compare multiple blocking strategies.


blocks: Dict[str, KlinkerBlockManager]: Blocking results dataset: KlinkerDataset: Dataset that was used for blocking improvement_metric: Union[str, List[str]]: Metric(s) used for calculating improvement


DataFrame with improvement values.
Source code in klinker/eval.py
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
def multiple_block_comparison(
    blocks: Dict[str, KlinkerBlockManager],
    dataset: KlinkerDataset,
    improvement_metrics: Union[str, List[str]] = "h3r",
) -> pd.DataFrame:
    """Compare multiple blocking strategies.

    Args:
    ----
      blocks: Dict[str, KlinkerBlockManager]: Blocking results
      dataset: KlinkerDataset: Dataset that was used for blocking
      improvement_metric: Union[str, List[str]]: Metric(s) used for calculating improvement

    Returns:
    -------
        DataFrame with improvement values.
    """
    blocks_with_eval = OrderedDict(
        {
            name: (
                blk,
                Evaluation.from_dataset(blocks=blk, dataset=dataset),
            )
            for name, blk in blocks.items()
        }
    )
    return multiple_block_comparison_from_eval(
        blocks_with_eval, dataset, improvement_metrics=improvement_metrics
    )

multiple_block_comparison_from_eval(blocks_with_eval, dataset, improvement_metrics='h3r')

Compare multiple blocking strategies.


blocks_with_eval: Dict[str, Tuple[KlinkerBlockManager, Evaluation]]: Blocking results and Evaluations dataset: KlinkerDataset: Dataset that was used for blocking improvement_metric: Union[str, List[str]]: Metric(s) used for calculating improvement


DataFrame with improvement values.
Source code in klinker/eval.py
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
def multiple_block_comparison_from_eval(
    blocks_with_eval: Dict[str, Tuple[KlinkerBlockManager, Evaluation]],
    dataset: KlinkerDataset,
    improvement_metrics: Union[str, List[str]] = "h3r",
) -> pd.DataFrame:
    """Compare multiple blocking strategies.

    Args:
    ----
      blocks_with_eval: Dict[str, Tuple[KlinkerBlockManager, Evaluation]]: Blocking results and Evaluations
      dataset: KlinkerDataset: Dataset that was used for blocking
      improvement_metric: Union[str, List[str]]: Metric(s) used for calculating improvement

    Returns:
    -------
        DataFrame with improvement values.
    """
    result = []
    seen_pairs = set()
    for b_a_name, (blocks_a, eval_a) in blocks_with_eval.items():
        for b_b_name, (blocks_b, eval_b) in blocks_with_eval.items():
            if (
                b_a_name != b_b_name
                and (b_a_name, b_b_name) not in seen_pairs
                and (
                    b_b_name,
                    b_a_name,
                )
                not in seen_pairs
            ):
                comparison = compare_blocks_from_eval(
                    blocks_a,
                    blocks_b,
                    eval_a,
                    eval_b,
                    dataset,
                    improvement_metrics=improvement_metrics,
                )
                comparison_a = [
                    comparison[f"improvement_a_{im}"] for im in improvement_metrics
                ]
                comparison_b = [
                    comparison[f"improvement_b_{im}"] for im in improvement_metrics
                ]
                result.append(
                    [
                        b_a_name,
                        b_b_name,
                        *comparison_a,
                        comparison["dice_tp"],
                    ]
                )
                result.append(
                    [
                        b_b_name,
                        b_a_name,
                        *comparison_b,
                        comparison["dice_tp"],
                    ]
                )
                seen_pairs.add((b_a_name, b_b_name))
    im_cols = [f"improvement_{im}" for im in improvement_metrics]
    return pd.DataFrame(result, columns=["base", "other", *im_cols, "dice_tp"])