Skip to content

composite

BaseCompositeRelationalClusteringBlocker

Bases: BaseCompositeUniqueNameBlocker

Source code in klinker/blockers/composite.py
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
class BaseCompositeRelationalClusteringBlocker(BaseCompositeUniqueNameBlocker):
    _relation_blocker: AttributeClusteringTokenBlocker
    _rel_blocker_cls = AttributeClusteringTokenBlocker

    def concat_relational_info(
        self,
        left: KlinkerFrame,
        right: KlinkerFrame,
        left_rel: KlinkerFrame,
        right_rel: KlinkerFrame,
    ) -> Tuple[SeriesType, SeriesType]:
        """Concatenate neighbor entity attribute values with own.

        Args:
        ----
          left: KlinkerFrame: Frame with attribute info of left dataset.
          right: KlinkerFrame: Frame with attribute info of right dataset.
          left_rel: KlinkerFrame: Relation triples of left dataset.
          right_rel: KlinkerFrame: Relation triples of right dataset.

        Returns:
        -------
            (left_conc, right_conc) Concatenated entity attribute values for left and right
        """
        left_conc = concat_neighbor_attributes(
            left,
            left_rel,
            include_own_attributes=False,
            top_n_a=self.top_n_a,
            top_n_r=self.top_n_r,
            do_not_concat_values=True,
        )
        right_conc = concat_neighbor_attributes(
            right,
            right_rel,
            include_own_attributes=False,
            top_n_a=self.top_n_a,
            top_n_r=self.top_n_r,
            do_not_concat_values=True,
        )
        return left_conc, right_conc

    def _compute_rel_blocks(
        self, left, right, left_rel, right_rel, unique_blocks
    ) -> KlinkerBlockManager:
        left_conc, right_conc = self.concat_relational_info(
            left=left, right=right, left_rel=left_rel, right_rel=right_rel
        )
        left_conc = left_conc.drop_duplicates()
        right_conc = right_conc.drop_duplicates()
        left_filtered = left_conc
        right_filtered = right_conc
        if self.use_unique_name:
            left_filtered = filter_with_unique(
                left_conc, unique_blocks.blocks[left.table_name]
            )
            right_filtered = filter_with_unique(
                right_conc, unique_blocks.blocks[right.table_name]
            )
        return self._relation_blocker.assign(left=left_filtered, right=right_filtered)

concat_relational_info(left, right, left_rel, right_rel)

Concatenate neighbor entity attribute values with own.


left: KlinkerFrame: Frame with attribute info of left dataset. right: KlinkerFrame: Frame with attribute info of right dataset. left_rel: KlinkerFrame: Relation triples of left dataset. right_rel: KlinkerFrame: Relation triples of right dataset.


(left_conc, right_conc) Concatenated entity attribute values for left and right
Source code in klinker/blockers/composite.py
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
def concat_relational_info(
    self,
    left: KlinkerFrame,
    right: KlinkerFrame,
    left_rel: KlinkerFrame,
    right_rel: KlinkerFrame,
) -> Tuple[SeriesType, SeriesType]:
    """Concatenate neighbor entity attribute values with own.

    Args:
    ----
      left: KlinkerFrame: Frame with attribute info of left dataset.
      right: KlinkerFrame: Frame with attribute info of right dataset.
      left_rel: KlinkerFrame: Relation triples of left dataset.
      right_rel: KlinkerFrame: Relation triples of right dataset.

    Returns:
    -------
        (left_conc, right_conc) Concatenated entity attribute values for left and right
    """
    left_conc = concat_neighbor_attributes(
        left,
        left_rel,
        include_own_attributes=False,
        top_n_a=self.top_n_a,
        top_n_r=self.top_n_r,
        do_not_concat_values=True,
    )
    right_conc = concat_neighbor_attributes(
        right,
        right_rel,
        include_own_attributes=False,
        top_n_a=self.top_n_a,
        top_n_r=self.top_n_r,
        do_not_concat_values=True,
    )
    return left_conc, right_conc