Skip to content

utils

cast_general_vector(vector, return_type)

Cast a vector to the desired type.


vector: GeneralVector: Vector to cast return_type: GeneralVectorLiteral: Wanted return type.


Vector in desired format

Examples:


>>> from klinker.utils import cast_general_vector
>>> import numpy as np
>>> arr = np.array([1,2,3])
>>> cast_general_vector(arr, "pt")
tensor([1, 2, 3])
>>> t_arr = cast_general_vector(arr, "pt")
>>> t_arr
tensor([1, 2, 3])
>>> cast_general_vector(t_arr, "np")
array([1, 2, 3])
Source code in klinker/utils.py
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
def cast_general_vector(
    vector: GeneralVector,
    return_type: GeneralVectorLiteral,
) -> GeneralVector:
    """Cast a vector to the desired type.

    Args:
    ----
      vector: GeneralVector: Vector to cast
      return_type: GeneralVectorLiteral: Wanted return type.

    Returns:
    -------
        Vector in desired format

    Examples:
    --------
        >>> from klinker.utils import cast_general_vector
        >>> import numpy as np
        >>> arr = np.array([1,2,3])
        >>> cast_general_vector(arr, "pt")
        tensor([1, 2, 3])
        >>> t_arr = cast_general_vector(arr, "pt")
        >>> t_arr
        tensor([1, 2, 3])
        >>> cast_general_vector(t_arr, "np")
        array([1, 2, 3])

    """
    if return_type == TorchVectorLiteral:
        return torch.tensor(vector) if not isinstance(vector, torch.Tensor) else vector
    elif return_type == NumpyVectorLiteral:
        return np.array(vector) if not isinstance(vector, np.ndarray) else vector
    else:
        raise ValueError(f"Unknown return_type: {return_type}!")

concat_frames(frames)

Concatenate dask or pandas frames.


frames: List[Frame]: List of dataframes.


concatenated dataframes
Source code in klinker/utils.py
46
47
48
49
50
51
52
53
54
55
56
57
58
59
def concat_frames(frames: List[Frame]) -> Frame:
    """Concatenate dask or pandas frames.

    Args:
    ----
      frames: List[Frame]: List of dataframes.

    Returns:
    -------
        concatenated dataframes
    """
    if isinstance(frames[0], (pd.DataFrame, pd.Series)):
        return pd.concat(frames)
    return dd.concat(frames)

resolve_device(device=None)

Resolve a torch.device given a desired device (string).


device: DeviceHint: (Default value = None)

Returns:

Source code in klinker/utils.py
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
def resolve_device(device: DeviceHint = None) -> torch.device:
    """Resolve a torch.device given a desired device (string).

    Args:
    ----
      device: DeviceHint:  (Default value = None)

    Returns:
    -------

    """
    # copy-pasted from pykeen
    if device is None or device == "gpu":
        device = "cuda"
    if isinstance(device, str):
        device = torch.device(device)
    if not torch.cuda.is_available() and device.type == "cuda":
        device = torch.device("cpu")
        logger.warning("No cuda devices were available. The model runs on CPU")
    return device

tokenize_row(row, tokenize_fn=word_tokenize, min_token_length=1)

Tokenize rows of series.


row: pd.Series: row with values to tokenize tokenize_fn: Callable[[str], List[str]]: Tokenization function min_token_length: int: Discard tokens below this value


List of tokens
Source code in klinker/utils.py
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
def tokenize_row(
    row: pd.Series,
    tokenize_fn: Callable[[str], List[str]] = word_tokenize,
    min_token_length: int = 1,
) -> List:
    """Tokenize rows of series.

    Args:
    ----
      row: pd.Series: row with values to tokenize
      tokenize_fn: Callable[[str], List[str]]: Tokenization function
      min_token_length: int: Discard tokens below this value

    Returns:
    -------
        List of tokens
    """
    res = []
    for value in row.values:
        res.extend(
            list(
                filter(
                    lambda x: len(x) >= min_token_length,
                    tokenize_fn(str(value)),
                )
            )
        )
    return list(set(res))