Source code for ClearMap.Analysis.Statistics.StatisticalTests
# -*- coding: utf-8 -*-
"""
statisticalTests
================
Colelction of some statistics tests
"""
__author__ = 'Christoph Kirst <christoph.kirst.ck@gmail.com>'
__license__ = 'GPLv3 - GNU General Pulic License v3 (see LICENSE.txt)'
__copyright__ = 'Copyright © 2020 by Christoph Kirst'
__webpage__ = 'http://idisco.info'
__download__ = 'http://www.github.com/ChristophKirst/ClearMap2'
import sys
import numpy as np
from scipy.stats import rankdata
from scipy.stats import distributions
[docs]
def test_cramer_von_mises_2_sample(x, y):
"""
Computes the Cramer von Mises two sample test.
Arguments
---------
x, y : 1-D ndarrays
Two arrays of sample observations assumed to be drawn from a continuous
distribution, sample sizes can be different.
Returns
-------
t_statistic : float
The t-statistic.
p_value : float
Two-tailed p-value.
Notes
-----
This is a two-sided test for the null hypothesis that 2 independent samples
are drawn from the same continuous distribution.
References
----------
* modified from https://github.com/scipy/scipy/pull/3659
"""
#following notation of Anderson et al. doi:10.1214/aoms/1177704477
N = len(x)
M = len(y)
assert N * M * (N + M) < sys.float_info.max
alldata = np.concatenate((x,y))
allranks = rankdata(alldata)
ri = allranks[:N]
sj = allranks[-M:]
i = rankdata(x)
j = rankdata(y)
#Anderson et al. Eqn 10
U = N*np.sum((ri - i)**2) + M*np.sum((sj - j)**2)
#print U
#Anderson et al. Eqn 9
T = U/(N * M * (N + M)) - (4 * M * N - 1)/(6 * (M + N))
#print T
Texpected = 1./6 + 1./(6 * (M + N))
Tvariance = 1./45 * (M + N + 1)/(M + N)**2 * (4 * M * N * (M+N) - 3*(M**2 + N**2) - 2*M*N)/(4 * M * N)
zscore = np.abs(T - Texpected) / np.sqrt(Tvariance)
#print zscore
return T, 2*distributions.norm.sf(zscore)