Source code for Orange.data.storage

class Storage:

    domain = None

    name = ""

    MISSING, DENSE, SPARSE, SPARSE_BOOL = range(4)

    def approx_len(self):
        return len(self)

    def X_density(self):
        return Storage.DENSE

    def Y_density(self):
        return Storage.DENSE

    def metas_density(self):
        return Storage.DENSE

    def _filter_is_defined(self, columns=None, negate=False):
        raise NotImplementedError

    def _filter_has_class(self, negate=False):
        raise NotImplementedError

    def _filter_random(self, prob, negate=False):
        raise NotImplementedError

    def _filter_same_value(self, column, value, negate=False):
        raise NotImplementedError

    def _filter_values(self, filter):
        raise NotImplementedError

    def _compute_basic_stats(self, columns=None):
        """Compute basic stats for each of the columns.

        :param columns: columns to calculate stats for. None = all of them
        :return: tuple(min, max, mean, 0, #nans, #non-nans)
        """
        raise NotImplementedError

    def _compute_distributions(self, columns=None):
        """Compute distribution of values for the given columns.

        :param columns: columns to calculate distributions for
        :return: a list of distributions. Type of distribution depends on the
                 type of the column:
                   - for discrete, distribution is a 1d np.array containing the
                     occurrence counts for each of the values.
                   - for continuous, distribution is a 2d np.array with
                     distinct (ordered) values of the variable in the first row
                     and their counts in second.
        """
        raise NotImplementedError

[docs] def _compute_contingency(self, col_vars=None, row_var=None): """ Compute contingency matrices for one or more discrete or continuous variables against the specified discrete variable. The resulting list contains a pair for each column variable. The first element contains the contingencies and the second elements gives the distribution of the row variables for instances in which the value of the column variable is missing. The format of contingencies returned depends on the variable type: - for discrete variables, it is a numpy array, where element (i, j) contains count of rows with i-th value of the row variable and j-th value of the column variable. - for continuous variables, contingency is a list of two arrays, where the first array contains ordered distinct values of the column_variable and the element (i,j) of the second array contains count of rows with i-th value of the row variable and j-th value of the ordered column variable. :param col_vars: variables whose values will correspond to columns of contingency matrices :type col_vars: list of ints, variable names or descriptors of type :obj:`Orange.data.Variable` :param row_var: a discrete variable whose values will correspond to the rows of contingency matrices :type row_var: int, variable name or :obj:`Orange.data.DiscreteVariable` """ raise NotImplementedError