Skip to content

base

Blocker

Bases: ABC

Abstract Blocker class.

Source code in klinker/blockers/base.py
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
class Blocker(abc.ABC):
    """Abstract Blocker class."""

    @abc.abstractmethod
    def assign(
        self,
        left: KlinkerFrame,
        right: KlinkerFrame,
        left_rel: Optional[KlinkerFrame] = None,
        right_rel: Optional[KlinkerFrame] = None,
    ) -> KlinkerBlockManager:
        """Assign entity ids to blocks.

        Args:
          left: KlinkerFrame: Contains entity attribute information of left dataset.
          right: KlinkerFrame: Contains entity attribute information of right dataset.
          left_rel: Optional[KlinkerFrame]:  (Default value = None) Contains relational information of left dataset.
          right_rel: Optional[KlinkerFrame]:  (Default value = None) Contains relational information of left dataset.

        Returns:
            KlinkerBlockManager: instance holding the resulting blocks.
        """

assign(left, right, left_rel=None, right_rel=None) abstractmethod

Assign entity ids to blocks.

Parameters:

Name Type Description Default
left KlinkerFrame

KlinkerFrame: Contains entity attribute information of left dataset.

required
right KlinkerFrame

KlinkerFrame: Contains entity attribute information of right dataset.

required
left_rel Optional[KlinkerFrame]

Optional[KlinkerFrame]: (Default value = None) Contains relational information of left dataset.

None
right_rel Optional[KlinkerFrame]

Optional[KlinkerFrame]: (Default value = None) Contains relational information of left dataset.

None

Returns:

Name Type Description
KlinkerBlockManager KlinkerBlockManager

instance holding the resulting blocks.

Source code in klinker/blockers/base.py
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
@abc.abstractmethod
def assign(
    self,
    left: KlinkerFrame,
    right: KlinkerFrame,
    left_rel: Optional[KlinkerFrame] = None,
    right_rel: Optional[KlinkerFrame] = None,
) -> KlinkerBlockManager:
    """Assign entity ids to blocks.

    Args:
      left: KlinkerFrame: Contains entity attribute information of left dataset.
      right: KlinkerFrame: Contains entity attribute information of right dataset.
      left_rel: Optional[KlinkerFrame]:  (Default value = None) Contains relational information of left dataset.
      right_rel: Optional[KlinkerFrame]:  (Default value = None) Contains relational information of left dataset.

    Returns:
        KlinkerBlockManager: instance holding the resulting blocks.
    """

SchemaAgnosticBlocker

Bases: Blocker

Base class for schema-agnostic Blockers

Source code in klinker/blockers/base.py
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
class SchemaAgnosticBlocker(Blocker):
    """Base class for schema-agnostic Blockers"""

    @abc.abstractmethod
    def _assign(
        self,
        left: SeriesType,
        right: SeriesType,
        left_rel: Optional[KlinkerFrame] = None,
        right_rel: Optional[KlinkerFrame] = None,
    ) -> KlinkerBlockManager:
        """Assign entity ids to blocks.

        Args:
          left: SeriesType: concatenated entity attribute values of left dataset as series.
          right: SeriesType: concatenated entity attribute values of left dataset as series.
          left_rel: Optional[KlinkerFrame]:  (Default value = None) Contains relational information of left dataset.
          right_rel: Optional[KlinkerFrame]:  (Default value = None) Contains relational information of left dataset.

        Returns:
            KlinkerBlockManager: instance holding the resulting blocks.
        """

    def assign(
        self,
        left: KlinkerFrame,
        right: KlinkerFrame,
        left_rel: Optional[KlinkerFrame] = None,
        right_rel: Optional[KlinkerFrame] = None,
    ) -> KlinkerBlockManager:
        """Assign entity ids to blocks.

        Will concat all entity attribute information before proceeding.

        Args:
          left: KlinkerFrame: Contains entity attribute information of left dataset.
          right: KlinkerFrame: Contains entity attribute information of right dataset.
          left_rel: Optional[KlinkerFrame]:  (Default value = None) Contains relational information of left dataset.
          right_rel: Optional[KlinkerFrame]:  (Default value = None) Contains relational information of left dataset.

        Returns:
            KlinkerBlockManager: instance holding the resulting blocks.
        """
        left_reduced, right_reduced = left.concat_values(), right.concat_values()
        return self._assign(
            left=left_reduced,
            right=right_reduced,
            left_rel=left_rel,
            right_rel=right_rel,
        )

assign(left, right, left_rel=None, right_rel=None)

Assign entity ids to blocks.

Will concat all entity attribute information before proceeding.

Parameters:

Name Type Description Default
left KlinkerFrame

KlinkerFrame: Contains entity attribute information of left dataset.

required
right KlinkerFrame

KlinkerFrame: Contains entity attribute information of right dataset.

required
left_rel Optional[KlinkerFrame]

Optional[KlinkerFrame]: (Default value = None) Contains relational information of left dataset.

None
right_rel Optional[KlinkerFrame]

Optional[KlinkerFrame]: (Default value = None) Contains relational information of left dataset.

None

Returns:

Name Type Description
KlinkerBlockManager KlinkerBlockManager

instance holding the resulting blocks.

Source code in klinker/blockers/base.py
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
def assign(
    self,
    left: KlinkerFrame,
    right: KlinkerFrame,
    left_rel: Optional[KlinkerFrame] = None,
    right_rel: Optional[KlinkerFrame] = None,
) -> KlinkerBlockManager:
    """Assign entity ids to blocks.

    Will concat all entity attribute information before proceeding.

    Args:
      left: KlinkerFrame: Contains entity attribute information of left dataset.
      right: KlinkerFrame: Contains entity attribute information of right dataset.
      left_rel: Optional[KlinkerFrame]:  (Default value = None) Contains relational information of left dataset.
      right_rel: Optional[KlinkerFrame]:  (Default value = None) Contains relational information of left dataset.

    Returns:
        KlinkerBlockManager: instance holding the resulting blocks.
    """
    left_reduced, right_reduced = left.concat_values(), right.concat_values()
    return self._assign(
        left=left_reduced,
        right=right_reduced,
        left_rel=left_rel,
        right_rel=right_rel,
    )