[docs]defcorrect_p_values(pvalues,method='BH'):"""Corrects p-values for multiple testing using various methods. Arguments --------- pvalues : array List of p values to be corrected. method : str Optional method to use: 'BH' = 'FDR' = 'Benjamini-Hochberg', 'B' = 'FWER' = 'Bonferoni'. Returns ------- qvalues : array Corrected p values. References ---------- - `Benjamini Hochberg, 1995 <http://www.jstor.org/stable/2346101?seq=1#page_scan_tab_contents>`_ - `Bonferoni correction <http://www.tandfonline.com/doi/abs/10.1080/01621459.1961.10482090#.VmHWUHbH6KE>`_ - `R statistics package <https://www.r-project.org/>`_ Notes ----- Modified from http://statsmodels.sourceforge.net/ipdirective/generated/scikits.statsmodels.sandbox.stats.multicomp.multipletests.html. """pvals=numpy.asarray(pvalues);ifmethod.lower()in['bh','fdr']:pvals_sorted_ids=numpy.argsort(pvals);pvals_sorted=pvals[pvals_sorted_ids]sorted_ids_inv=pvals_sorted_ids.argsort()n=len(pvals);bhfactor=numpy.arange(1,n+1)/float(n);pvals_corrected_raw=pvals_sorted/bhfactor;pvals_corrected=numpy.minimum.accumulate(pvals_corrected_raw[::-1])[::-1]pvals_corrected[pvals_corrected>1]=1;returnpvals_corrected[sorted_ids_inv];elifmethod.lower()in['b','fwer']:n=len(pvals);pvals_corrected=n*pvals;pvals_corrected[pvals_corrected>1]=1;\
returnpvals_corrected;
#return reject[pvals_sortind.argsort()]
[docs]defestimate_q_values(pvalues,m=None,pi0=None,verbose=False,low_memory=False):"""Estimates q-values from p-values. Arguments --------- pvalues : array List of p-values. m : int or None Number of tests. If None, m = pvalues.size pi0 : float or None Estimate of m_0 / m which is the (true null / total tests) ratio. If None estimation via cubic spline. verbose : bool Print info during execution low_memory : bool If true, use low memory version. Returns ------- qvalues : array The q values. Notes ----- - The q-value of a particular feature can be described as the expected proportion of false positives among all features as or more extreme than the observed one. - The estimated q-values are increasing in the same order as the p-values. References ---------- - `Storey and Tibshirani, 2003 <http://www.pnas.org/content/100/16/9440.full>`_ - modified from https://github.com/nfusi/qvalue """ifnot(pvalues.min()>=0andpvalues.max()<=1):raiseRuntimeError("estimateQValues: p-values should be between 0 and 1");original_shape=pvalues.shapepvalues=pvalues.ravel()# flattens the array in place, more efficient than flatten() ifm==None:m=float(len(pvalues))else:# the user has supplied an mm*=1.0# if the number of hypotheses is small, just set pi0 to 1iflen(pvalues)<100andpi0==None:pi0=1.0elifpi0!=None:pi0=pi0else:# evaluate pi0 for different lambdaspi0=[]lam=scipy.arange(0,0.90,0.01)counts=scipy.array([(pvalues>i).sum()foriinlam])forlinrange(len(lam)):pi0.append(counts[l]/(m*(1-lam[l])))pi0=scipy.array(pi0)# fit natural cubic scipylinetck=scipy.interpolate.splrep(lam,pi0,k=3)pi0=scipy.interpolate.splev(lam[-1],tck)ifpi0>1:ifverbose:raiseWarning("estimateQValues: got pi0 > 1 (%.3f) while estimating qvalues, setting it to 1"%pi0);pi0=1.0ifnot(pi0>=0andpi0<=1):raiseRuntimeError("estimateQValues: pi0 is not between 0 and 1: %f"%pi0);iflow_memory:# low memory version, only uses 1 pvalues and 1 qv matricesqv=scipy.zeros((len(pvalues),))last_pvalues=pvalues.argmax()qv[last_pvalues]=(pi0*pvalues[last_pvalues]*m)/float(m)pvalues[last_pvalues]=-scipy.infprev_qv=last_pvaluesforiinrange(int(len(pvalues))-2,-1,-1):cur_max=pvalues.argmax()qv_i=(pi0*m*pvalues[cur_max]/float(i+1))pvalues[cur_max]=-scipy.infqv_i1=prev_qvqv[cur_max]=min(qv_i,qv_i1)prev_qv=qv[cur_max]else:p_ordered=scipy.argsort(pvalues)pvalues=pvalues[p_ordered]qv=pi0*m/len(pvalues)*pvaluesqv[-1]=min(qv[-1],1.0)foriinrange(len(pvalues)-2,-1,-1):qv[i]=min(pi0*m*pvalues[i]/(i+1.0),qv[i+1])# reorder qvaluesqv_temp=qv.copy()qv=scipy.zeros_like(qv)qv[p_ordered]=qv_temp# reshape qvaluesqv=qv.reshape(original_shape)returnqv