Source code for ClearMap.Analysis.Statistics.MultipleComparisonCorrection

# -*- coding: utf-8 -*-
"""
MultipleComparisonCorrection
============================

Correction methods for multiple comparison tests.
"""
__author__    = 'Christoph Kirst <christoph.kirst.ck@gmail.com>'
__license__   = 'GPLv3 - GNU General Pulic License v3 (see LICENSE.txt)'
__copyright__ = 'Copyright © 2020 by Christoph Kirst'
__webpage__   = 'http://idisco.info'
__download__  = 'http://www.github.com/ChristophKirst/ClearMap2'


import numpy
import scipy
import scipy.interpolate
   

###############################################################################
### Bnejamini Hochberg correction
############################################################################### 
    
[docs] def correct_p_values(pvalues, method = 'BH'): """Corrects p-values for multiple testing using various methods. Arguments --------- pvalues : array List of p values to be corrected. method : str Optional method to use: 'BH' = 'FDR' = 'Benjamini-Hochberg', 'B' = 'FWER' = 'Bonferoni'. Returns ------- qvalues : array Corrected p values. References ---------- - `Benjamini Hochberg, 1995 <http://www.jstor.org/stable/2346101?seq=1#page_scan_tab_contents>`_ - `Bonferoni correction <http://www.tandfonline.com/doi/abs/10.1080/01621459.1961.10482090#.VmHWUHbH6KE>`_ - `R statistics package <https://www.r-project.org/>`_ Notes ----- Modified from http://statsmodels.sourceforge.net/ipdirective/generated/scikits.statsmodels.sandbox.stats.multicomp.multipletests.html. """ pvals = numpy.asarray(pvalues); if method.lower() in ['bh', 'fdr']: pvals_sorted_ids = numpy.argsort(pvals); pvals_sorted = pvals[pvals_sorted_ids] sorted_ids_inv = pvals_sorted_ids.argsort() n = len(pvals); bhfactor = numpy.arange(1,n+1)/float(n); pvals_corrected_raw = pvals_sorted / bhfactor; pvals_corrected = numpy.minimum.accumulate(pvals_corrected_raw[::-1])[::-1] pvals_corrected[pvals_corrected>1] = 1; return pvals_corrected[sorted_ids_inv]; elif method.lower() in ['b', 'fwer']: n = len(pvals); pvals_corrected = n * pvals; pvals_corrected[pvals_corrected>1] = 1;\ return pvals_corrected;
#return reject[pvals_sortind.argsort()]
[docs] def estimate_q_values(pvalues, m = None, pi0 = None, verbose = False, low_memory = False): """Estimates q-values from p-values. Arguments --------- pvalues : array List of p-values. m : int or None Number of tests. If None, m = pvalues.size pi0 : float or None Estimate of m_0 / m which is the (true null / total tests) ratio. If None estimation via cubic spline. verbose : bool Print info during execution low_memory : bool If true, use low memory version. Returns ------- qvalues : array The q values. Notes ----- - The q-value of a particular feature can be described as the expected proportion of false positives among all features as or more extreme than the observed one. - The estimated q-values are increasing in the same order as the p-values. References ---------- - `Storey and Tibshirani, 2003 <http://www.pnas.org/content/100/16/9440.full>`_ - modified from https://github.com/nfusi/qvalue """ if not (pvalues.min() >= 0 and pvalues.max() <= 1): raise RuntimeError("estimateQValues: p-values should be between 0 and 1"); original_shape = pvalues.shape pvalues = pvalues.ravel() # flattens the array in place, more efficient than flatten() if m == None: m = float(len(pvalues)) else: # the user has supplied an m m *= 1.0 # if the number of hypotheses is small, just set pi0 to 1 if len(pvalues) < 100 and pi0 == None: pi0 = 1.0 elif pi0 != None: pi0 = pi0 else: # evaluate pi0 for different lambdas pi0 = [] lam = scipy.arange(0, 0.90, 0.01) counts = scipy.array([(pvalues > i).sum() for i in lam]) for l in range(len(lam)): pi0.append(counts[l]/(m*(1-lam[l]))) pi0 = scipy.array(pi0) # fit natural cubic scipyline tck = scipy.interpolate.splrep(lam, pi0, k = 3) pi0 = scipy.interpolate.splev(lam[-1], tck) if pi0 > 1: if verbose: raise Warning("estimateQValues: got pi0 > 1 (%.3f) while estimating qvalues, setting it to 1" % pi0); pi0 = 1.0 if not (pi0 >= 0 and pi0 <= 1): raise RuntimeError("estimateQValues: pi0 is not between 0 and 1: %f" % pi0); if low_memory: # low memory version, only uses 1 pvalues and 1 qv matrices qv = scipy.zeros((len(pvalues),)) last_pvalues = pvalues.argmax() qv[last_pvalues] = (pi0*pvalues[last_pvalues]*m)/float(m) pvalues[last_pvalues] = -scipy.inf prev_qv = last_pvalues for i in range(int(len(pvalues))-2, -1, -1): cur_max = pvalues.argmax() qv_i = (pi0*m*pvalues[cur_max]/float(i+1)) pvalues[cur_max] = -scipy.inf qv_i1 = prev_qv qv[cur_max] = min(qv_i, qv_i1) prev_qv = qv[cur_max] else: p_ordered = scipy.argsort(pvalues) pvalues = pvalues[p_ordered] qv = pi0 * m/len(pvalues) * pvalues qv[-1] = min(qv[-1],1.0) for i in range(len(pvalues)-2, -1, -1): qv[i] = min(pi0*m*pvalues[i]/(i+1.0), qv[i+1]) # reorder qvalues qv_temp = qv.copy() qv = scipy.zeros_like(qv) qv[p_ordered] = qv_temp # reshape qvalues qv = qv.reshape(original_shape) return qv
############################################################################### ### Tests ############################################################################### def _test(): import numpy as np import ClearMap.Analysis.Statistics.MultipleComparisonCorrection as mcc