Skip to content

utils

cast_general_vector(vector, return_type)

Cast a vector to the desired type

Parameters:

Name Type Description Default
vector GeneralVector

GeneralVector: Vector to cast

required
return_type GeneralVectorLiteral

GeneralVectorLiteral: Wanted return type.

required

Returns:

Type Description
GeneralVector

Vector in desired format

Examples:

>>> from klinker.utils import cast_general_vector
>>> import numpy as np
>>> arr = np.array([1,2,3])
>>> cast_general_vector(arr, "pt")
tensor([1, 2, 3])
>>> t_arr = cast_general_vector(arr, "pt")
>>> t_arr
tensor([1, 2, 3])
>>> cast_general_vector(t_arr, "np")
array([1, 2, 3])
Source code in klinker/utils.py
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
def cast_general_vector(
    vector: GeneralVector,
    return_type: GeneralVectorLiteral,
) -> GeneralVector:
    """Cast a vector to the desired type

    Args:
      vector: GeneralVector: Vector to cast
      return_type: GeneralVectorLiteral: Wanted return type.

    Returns:
        Vector in desired format

    Examples:

        >>> from klinker.utils import cast_general_vector
        >>> import numpy as np
        >>> arr = np.array([1,2,3])
        >>> cast_general_vector(arr, "pt")
        tensor([1, 2, 3])
        >>> t_arr = cast_general_vector(arr, "pt")
        >>> t_arr
        tensor([1, 2, 3])
        >>> cast_general_vector(t_arr, "np")
        array([1, 2, 3])

    """
    if return_type == TorchVectorLiteral:
        return torch.tensor(vector) if not isinstance(vector, torch.Tensor) else vector
    elif return_type == NumpyVectorLiteral:
        return np.array(vector) if not isinstance(vector, np.ndarray) else vector
    else:
        raise ValueError(f"Unknown return_type: {return_type}!")

concat_frames(frames)

Concatenate dask or pandas frames.

Parameters:

Name Type Description Default
frames List[Frame]

List[Frame]: List of dataframes.

required

Returns:

Type Description
Frame

concatenated dataframes

Source code in klinker/utils.py
54
55
56
57
58
59
60
61
62
63
64
65
def concat_frames(frames: List[Frame]) -> Frame:
    """Concatenate dask or pandas frames.

    Args:
      frames: List[Frame]: List of dataframes.

    Returns:
        concatenated dataframes
    """
    if isinstance(frames[0], pd.DataFrame):
        return pd.concat(frames)
    return dd.concat(frames)

resolve_device(device=None)

Resolve a torch.device given a desired device (string).

Parameters:

Name Type Description Default
device DeviceHint

DeviceHint: (Default value = None)

None

Returns:

Source code in klinker/utils.py
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
def resolve_device(device: DeviceHint = None) -> torch.device:
    """Resolve a torch.device given a desired device (string).

    Args:
      device: DeviceHint:  (Default value = None)

    Returns:

    """
    # copy-pasted from pykeen
    if device is None or device == "gpu":
        device = "cuda"
    if isinstance(device, str):
        device = torch.device(device)
    if not torch.cuda.is_available() and device.type == "cuda":
        device = torch.device("cpu")
        logger.warning("No cuda devices were available. The model runs on CPU")
    return device

tokenize_row(row, tokenize_fn=word_tokenize, min_token_length=1)

Tokenize rows of series.

Parameters:

Name Type Description Default
row Series

pd.Series: row with values to tokenize

required
tokenize_fn Callable[[str], List[str]]

Callable[[str], List[str]]: Tokenization function

word_tokenize
min_token_length int

int: Discard tokens below this value

1

Returns:

Type Description
List

List of tokens

Source code in klinker/utils.py
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
def tokenize_row(
    row: pd.Series,
    tokenize_fn: Callable[[str], List[str]] = word_tokenize,
    min_token_length: int = 1,
) -> List:
    """Tokenize rows of series.

    Args:
      row: pd.Series: row with values to tokenize
      tokenize_fn: Callable[[str], List[str]]: Tokenization function
      min_token_length: int: Discard tokens below this value

    Returns:
        List of tokens
    """
    res = []
    for value in row.values:
        res.extend(
            list(
                filter(
                    lambda x: len(x) >= min_token_length,
                    tokenize_fn(str(value)),
                )
            )
        )
    return list(set(res))