Source code for Orange.projection.freeviz


import numpy as np
import scipy.spatial

from Orange.preprocess.preprocess import RemoveNaNRows, Continuize, Scale
from Orange.projection import LinearProjector, DomainProjection

__all__ = ["FreeViz"]


class FreeVizModel(DomainProjection):
    var_prefix = "freeviz"



[docs]
class FreeViz(LinearProjector):
    name = 'FreeViz'
    supports_sparse = False
    preprocessors = [RemoveNaNRows(),
                     Continuize(multinomial_treatment=Continuize.FirstAsBase),
                     Scale(scale=Scale.Span)]
    projection = FreeVizModel

    def __init__(self, weights=None, center=True, scale=True, dim=2, p=1,
                 initial=None, maxiter=500, alpha=0.1, gravity=None,
                 atol=1e-5, preprocessors=None):
        super().__init__(preprocessors=preprocessors)
        self.weights = weights
        self.center = center
        self.scale = scale
        self.dim = dim
        self.p = p
        self.initial = initial
        self.maxiter = maxiter
        self.alpha = alpha
        self.atol = atol
        self.gravity = gravity
        self.is_class_discrete = False
        self.components_ = None

    def __call__(self, data):
        if data is not None:
            self.is_class_discrete = data.domain.class_var.is_discrete
            if len([attr for attr in data.domain.attributes
                    if attr.is_discrete and len(attr.values) > 2]):
                raise ValueError("Can not handle discrete variables"
                                 " with more than two values")
        return super().__call__(data)

    def get_components(self, X, Y):
        return self.freeviz(
            X, Y, weights=self.weights, center=self.center, scale=self.scale,
            dim=self.dim, p=self.p, initial=self.initial,
            maxiter=self.maxiter, alpha=self.alpha, atol=self.atol,
            gravity=self.gravity,
            is_class_discrete=self.is_class_discrete)[1].T

    @classmethod
    def squareform(cls, d):
        """
        Parameters
        ----------
        d : (N * (N - 1) // 2, ) ndarray
            A hollow symmetric square array in condensed form

        Returns
        -------
        D : (N, N) ndarray
            A symmetric square array in redundant form.

        See also
        --------
        scipy.spatial.distance.squareform
        """
        assert d.ndim == 1
        return scipy.spatial.distance.squareform(d, checks=False)

    @classmethod
    def row_v(cls, a):
        """
        Return a view of `a` as a row vector.
        """
        return a.reshape((1, -1))

    @classmethod
    def col_v(cls, a):
        """
        Return a view of `a` as a column vector.
        """
        return a.reshape((-1, 1))

    @classmethod
    def allclose(cls, a, b, rtol=1e-5, atol=1e-8, equal_nan=False):
        # same as np.allclose in numpy==1.10
        return np.all(np.isclose(a, b, rtol, atol, equal_nan=equal_nan))

    @classmethod
    def forces_regression(cls, distances, y, p=1):
        y = np.asarray(y)
        ydist = scipy.spatial.distance.pdist(y.reshape(-1, 1), "sqeuclidean")
        mask = distances > np.finfo(distances.dtype).eps * 100
        F = ydist
        if p == 1:
            F[mask] /= distances[mask]
        else:
            F[mask] /= distances[mask] ** p
        return F

    @classmethod
    def forces_classification(cls, distances, y, p=1, gravity=None):
        diffclass = scipy.spatial.distance.pdist(y.reshape(-1, 1), "hamming") != 0
        # handle attractive force
        if p == 1:
            F = -distances
        else:
            F = -(distances ** p)

        # handle repulsive force
        mask = (diffclass &
                (distances > np.finfo(distances.dtype).eps * 100))
        assert mask.shape == F.shape and mask.dtype == bool
        if p == 1:
            F[mask] = 1 / distances[mask]
        else:
            F[mask] = 1 / (distances[mask] ** p)
        if gravity is not None:
            F[mask] *= -np.sum(F[~mask]) / np.sum(F[mask]) / gravity
        return F

    @classmethod
    def gradient(cls, X, embeddings, forces, embedding_dist=None, weights=None):
        X = np.asarray(X)
        embeddings = np.asarray(embeddings)

        if weights is not None:
            weights = np.asarray(weights)
            if weights.ndim != 1:
                raise ValueError("weights.ndim != 1 ({})".format(weights.ndim))

        N, P = X.shape
        _, dim = embeddings.shape

        if not N == embeddings.shape[0]:
            raise ValueError("X and embeddings must have the same length ({}!={})"
                             .format(X.shape[0], embeddings.shape[0]))

        if weights is not None and X.shape[0] != weights.shape[0]:
            raise ValueError("X.shape[0] != weights.shape[0] ({}!={})"
                             .format(X.shape[0], weights.shape[0]))

        # all pairwise vector differences between embeddings
        embedding_diff = (embeddings[:, np.newaxis, :] -
                          embeddings[np.newaxis, :, :])
        assert embedding_diff.shape == (N, N, dim)
        assert cls.allclose(embedding_diff[0, 1], embeddings[0] - embeddings[1])
        assert cls.allclose(embedding_diff[1, 0], -embedding_diff[0, 1])

        # normalize the direction vectors to unit direction vectors
        if embedding_dist is not None:
            # use supplied precomputed distances
            diff_norm = cls.squareform(embedding_dist)
        else:
            diff_norm = np.linalg.norm(embedding_diff, axis=2)

        mask = diff_norm > np.finfo(diff_norm.dtype).eps * 100
        embedding_diff[mask] /= diff_norm[mask][:, np.newaxis]

        forces = cls.squareform(forces)

        if weights is not None:
            # multiply in the instance weights
            forces *= cls.row_v(weights)
            forces *= cls.col_v(weights)

        # multiply unit direction vectors with the force magnitude
        F = embedding_diff * forces[:, :, np.newaxis]
        assert F.shape == (N, N, dim)
        # sum all the forces acting on a particle
        F = np.sum(F, axis=0)
        assert F.shape == (N, dim)
        # Transfer forces to the 'anchors'
        # (P, dim) array of gradients
        G = X.T.dot(F)
        assert G.shape == (P, dim)
        return G

    @classmethod
    def freeviz_gradient(cls, X, y, embedding, p=1, weights=None,
                         gravity=None, is_class_discrete=False):
        """
        Return the gradient for the FreeViz [1]_ projection.

        Parameters
        ----------
        X : (N, P) ndarray
            The data instance coordinates
        y : (N,) ndarray
            The instance target/class values
        embedding : (N, dim) ndarray
            The current FreeViz point embeddings.
        p : positive number
            The force 'power', e.g. if p=1 (default) the attractive/repulsive
            forces follow linear/inverse linear law, for p=2 the forces follow
            square/inverse square law, ...
        weights : (N, ) ndarray, optional
            Optional vector of sample weights.

        Returns
        -------
        G : (P, dim) ndarray
            The projection gradient.

        .. [1] Janez Demsar, Gregor Leban, Blaz Zupan
               FreeViz - An Intelligent Visualization Approach for Class-Labeled
               Multidimensional Data Sets, Proceedings of IDAMAP 2005, Edinburgh.
        """
        X = np.asarray(X)
        y = np.asarray(y)
        embedding = np.asarray(embedding)
        assert X.ndim == 2 and X.shape[0] == y.shape[0] == embedding.shape[0]
        D = scipy.spatial.distance.pdist(embedding)
        if is_class_discrete:
            forces = cls.forces_classification(D, y, p=p, gravity=gravity)
        else:
            forces = cls.forces_regression(D, y, p=p)
        G = cls.gradient(X, embedding, forces, embedding_dist=D, weights=weights)
        return G

    @classmethod
    def _rotate(cls, A):
        """
        Rotate a 2D projection A so the first axis (row in A) is aligned with
        vector (1, 0).
        """
        assert A.ndim == 2 and A.shape[1] == 2
        phi = np.arctan2(A[0, 1], A[0, 0])
        R = [[np.cos(-phi), np.sin(-phi)],
             [-np.sin(-phi), np.cos(-phi)]]
        return np.dot(A, R)

    @classmethod
    def freeviz(cls, X, y, weights=None, center=True, scale=True, dim=2, p=1,
                initial=None, maxiter=500, alpha=0.1, atol=1e-5, gravity=None,
                is_class_discrete=False):
        """
        FreeViz

        Compute a linear lower dimensional projection to optimize separation
        between classes ([1]_).

        Parameters
        ----------
        X : (N, P) ndarray
            The input data instances
        y : (N, ) ndarray
            The instance class labels
        weights : (N, ) ndarray, optional
            Instance weights
        center : bool or (P,) ndarray
            If `True` then X will have mean subtracted out, if False no
            centering is performed. Alternatively can be a P vector to subtract
            from X.
        scale : bool or (P,) ndarray
            If `True` the X's column will be scaled by 1/SD, if False no scaling
            is performed. Alternatively can be a P vector to divide X by.
        dim : int
            The dimension of the projected points/embedding.
        p : positive number
            The force 'power', e.g. if p=1 (default) the attractive/repulsive
            forces follow linear/inverse linear law, for p=2 the forces follow
            square/inverse square law, ...
        initial : (P, dim) ndarray, optional
            Initial projection matrix
        maxiter : int
            Maximum number of iterations.
        alpha : float
            The step size ('learning rate')
        atol : float
            Terminating numerical tolerance (absolute).

        Returns
        -------
        embeddings : (N, dim) ndarray
            The point projections (`= X.dot(P)`)
        projection : (P, dim)
            The projection matrix.
        center : (P,) ndarray or None
            The translation applied to X (if any).
        scale : (P,) ndarray or None
            The scaling applied to X (if any).

        .. [1] Janez Demsar, Gregor Leban, Blaz Zupan
               FreeViz - An Intelligent Visualization Approach for Class-Labeled
               Multidimensional Data Sets, Proceedings of IDAMAP 2005, Edinburgh.
        """
        needcopy = center is not False or scale is not False
        X = np.array(X, copy=needcopy)
        y = np.asarray(y)
        N, P = X.shape
        _N, = y.shape
        if N != _N:
            raise ValueError("X and y must have the same length")

        if weights is not None:
            weights = np.asarray(weights)

        if isinstance(center, bool):
            if center:
                center = np.mean(X, axis=0)
            else:
                center = None
        else:
            center = np.asarray(center, dtype=X.dtype)
            if center.shape != (P, ):
                raise ValueError("center.shape != (X.shape[1], ) ({} != {})"
                                 .format(center.shape, (X.shape[1], )))

        if isinstance(scale, bool):
            if scale:
                scale = np.std(X, axis=0)
            else:
                scale = None
        else:
            scale = np.asarray(scale, dtype=X.dtype)
            if scale.shape != (P, ):
                raise ValueError("scale.shape != (X.shape[1],) ({} != {}))"
                                 .format(scale.shape, (P, )))

        if initial is not None:
            initial = np.asarray(initial)
            if initial.ndim != 2 or initial.shape != (P, dim):
                raise ValueError
        else:
            initial = cls.init_random(P, dim)
            # initial = np.random.random((P, dim)) * 2 - 1

        # Center/scale X if requested
        if center is not None:
            X -= center

        if scale is not None:
            scalenonzero = np.abs(scale) > np.finfo(scale.dtype).eps
            X[:, scalenonzero] /= scale[scalenonzero]

        A = initial
        embeddings = np.dot(X, A)

        step_i = 0
        while step_i < maxiter:
            G = cls.freeviz_gradient(X, y, embeddings, p=p, weights=weights,
                                     gravity=gravity,
                                     is_class_discrete=is_class_discrete)

            # Scale the changes (the largest anchor move is alpha * radius)
            with np.errstate(divide="ignore"):  # inf's will be ignored by min
                step = np.min(np.linalg.norm(A, axis=1)
                              / np.linalg.norm(G, axis=1))
                if not np.isfinite(step):
                    break
            step = alpha * step
            Anew = A - step * G

            # Center anchors (?? This does not seem right; it changes the
            # projection axes direction somewhat arbitrarily)
            Anew = Anew - np.mean(Anew, axis=0)

            # Scale (so that the largest radius is 1)
            maxr = np.max(np.linalg.norm(Anew, axis=1))
            if maxr >= 0.001:
                Anew /= maxr

            change = np.linalg.norm(Anew - A, axis=1)
            if cls.allclose(change, 0, atol=atol):
                break

            A = Anew
            embeddings = np.dot(X, A)
            step_i = step_i + 1

        if dim == 2:
            A = cls._rotate(A)

        return embeddings, A, center, scale

    @staticmethod
    def init_radial(p):
        """
        Return a 2D projection with a circular anchor placement.
        """
        assert p > 0
        if p == 1:
            axes_angle = [0]
        elif p == 2:
            axes_angle = [0, np.pi / 2]
        else:
            axes_angle = np.linspace(0, 2 * np.pi, p, endpoint=False)

        A = np.c_[np.cos(axes_angle), np.sin(axes_angle)]
        return A

    @staticmethod
    def init_random(p, dim, rstate=None):
        if not isinstance(rstate, np.random.RandomState):
            rstate = np.random.RandomState(rstate if rstate is not None else 0)
        return rstate.rand(p, dim) * 2 - 1
Source code for Orange.projection.freeviz

Orange Data Mining Library

Navigation

Related Topics