Source code for KGE.ns_strategy

import numpy as np
import tensorflow as tf

from .utils import ns_with_same_type

[docs]class NegativeSampler:
    """ A base module for negative sampler.
    """
[docs]    def __init__(self):
        """ Initialized negative sampler
        """
        raise NotImplementedError("subclass of NegativeSampler should implement __init__() to init class")

[docs]    def __call__(self):
        """ Confuct negative sampling
        """
        raise NotImplementedError("subclass of NegativeSampler should implement __call__() to conduct negative sampling")


[docs]class UniformStrategy(NegativeSampler):
    """ An implementation of uniform negative sampling

    Uniform sampling is the most simple negative sampling strategy, usually is
    the default setting of knowledge graph embedding models. It sample entities
    from all entites with uniform distribution, and replaces either head or tail
    entity.
    """

[docs]    def __init__(self, sample_pool):
        """ Initialize UniformStrategy negative sampler.

        Parameters
        ----------
        sample_pool : tf.Tensor
            entities pool that used to sample.
        """
        self.sample_pool = sample_pool

[docs]    def __call__(self, X, negative_ratio, side):
        """ perform negative sampling

        Parameters
        ----------
        X : tf.Tensor
            positive triplets to be corrupt.
        negative_ratio : int
            number of negative sample.
        side : str
            corrup from which side, can be :code:`'h'` or :code:`'t'`

        Returns
        -------
        tf.Tensor
            sampling entities
        """

        self.sample_pool = tf.cast(self.sample_pool, X.dtype)
        sample_index = tf.random.uniform(
            shape=[X.shape[0] * negative_ratio, 1],
            minval=0, maxval=len(self.sample_pool), dtype=self.sample_pool.dtype
        )
        sample_entities = tf.gather_nd(self.sample_pool, sample_index)

        return sample_entities

[docs]class TypedStrategy(NegativeSampler):
    """ An implementation of typed negative sampling strategy.

    Typed negative sampling consider the entities' type, for example, for the
    positive triplet :math:`(MonaLisa, is\_in, Louvre)`, we may generate illogical
    negative triplet such as :math:`(MonaLis, is\_in, DaVinci)`. So Typed negative
    sampling strategy consider the type of entity to be corrupt, if we want
    to replace *Louvre*, we only sample the entities which have same type
    with *Louvre*.

    .. caution::
        When using :py:mod:`TypedStrategy <KGE.ns_strategy.TypedStrategy>`, :code:`metadata` should contains
        key :code:`'ind2type'` to indicate the entities' type when calling
        :py:func:`train() <KGE.models.base_model.BaseModel.KGEModel.train>`.
    """
[docs]    def __init__(self, pool, metadata):
        """ Initialize TypedStrategy negative sampler.

        Parameters
        ----------
        pool : :ref:`multiprocessing.pool.Pool <https://docs.python.org/3/library/multiprocessing.html#multiprocessing.pool.Pool>`
            multiprocessing pool for parallel.
        metadata : dict
            metadata that store the entities' type information.
        """
        self.pool = pool
        self.metadata = metadata

[docs]    def __call__(self, X, negative_ratio, side):
        """ perform negative sampling

        Parameters
        ----------
        X : tf.Tensor
            positive triplets to be corrupt.
        negative_ratio : int
            number of negative sample.
        side : str
            corrup from which side, can be :code:`'h'` or :code:`'t'`

        Returns
        -------
        tf.Tensor
            sampling entities
        """
        
        from itertools import repeat

        if side == "h":
            ref_type = X[:, 0].numpy()
        elif side == "t":
            ref_type = X[:, 2].numpy()

        if self.pool is not None:
            sample_entities = self.pool.starmap(
                ns_with_same_type,
                zip(ref_type, repeat(self.metadata), repeat(negative_ratio))
            )
        else:
            sample_entities = list(map(
                lambda x: ns_with_same_type(x, self.metadata, negative_ratio),
                ref_type
            ))

        sample_entities = tf.constant(np.concatenate(sample_entities), dtype=X.dtype)

        return sample_entities