Illustrates how to test for goodness-of-fit using classes in the Extreme.Statistics.Tests namespace in IronPython.
import numerics
from System import Array
from Extreme.Mathematics import *
from Extreme.Statistics import *
from Extreme.Statistics.Distributions import *
from Extreme.Statistics.Random import MersenneTwister
from Extreme.Statistics.Tests import *
#/ Illustrates the Chi Square, Kolmogorov-Smirnov and Anderson-Darling
#/ tests for goodness-of-fit.
# This QuickStart Sample illustrates the wide variety of goodness-of-fit
# tests available.
#
# Chi-square Test
#
print "Chi-square test."
# The Chi-square test is the simplest of the goodness-of-fit tests.
# The results follow a binomial distribution with 3 trials (rolls of the dice):
sixesDistribution = BinomialDistribution(3, 1/6.0)
# First, create a histogram with the expected results.
expected = sixesDistribution.GetExpectedHistogram(100)
# And a histogram with the actual results
actual = Histogram(0, 4)
actual.SetTotals(Array[float]([51, 35, 12, 2]))
chiSquare = ChiSquareGoodnessOfFitTest(actual, expected)
chiSquare.SignificanceLevel = 0.01
# We can obtan the value of the test statistic through the Statistic property, # and the corresponding P-value through the Probability property:
print "Test statistic: {0:.4f}".format(chiSquare.Statistic)
print "P-value: {0:.4f}".format(chiSquare.PValue)
# We can now print the test results:
print "Reject null hypothesis?", "yes" if chiSquare.Reject() else "no"
#
# One-sample Kolmogorov-Smirnov Test
#
print "\nOne-sample Kolmogorov-Smirnov Test"
# We will investigate a sample of 25 random numbers from a lognormal distribution
# and investigate how well it matches a similar looking Weibull distribution.
# We first create the two distributions:
logNormal = LognormalDistribution(0, 1)
weibull = WeibullDistribution(2, 1)
# Then we generate the samples from the lognormal distribution:
logNormalData = Vector.Create(25)
logNormal.Sample(MersenneTwister(), logNormalData)
logNormalSample = NumericalVariable(logNormalData)
# Finally, we construct the Kolmogorov-Smirnov test:
ksTest = OneSampleKolmogorovSmirnovTest(logNormalSample, weibull)
# We can obtan the value of the test statistic through the Statistic property, # and the corresponding P-value through the Probability property:
print "Test statistic: {0:.4f}".format(ksTest.Statistic)
print "P-value: {0:.4f}".format(ksTest.PValue)
# We can now print the test results:
print "Reject null hypothesis?", "yes" if ksTest.Reject() else "no"
#
# Two-sample Kolmogorov-Smirnov Test
#
print "\nTwo-sample Kolmogorov-Smirnov Test"
# We once again investigate the similarity between a lognormal and
# a Weibull distribution. However, this time, we use 25 random
# samples from each distribution.
# We already have the lognormal samples.
# Generate the samples from the Weibull distribution:
weibullData = Vector.Create(25)
weibull.Sample(MersenneTwister(), weibullData)
weibullSample = NumericalVariable(weibullData)
# Finally, we construct the Kolmogorov-Smirnov test:
ksTest2 = TwoSampleKolmogorovSmirnovTest(logNormalSample, weibullSample)
# We can obtan the value of the test statistic through the Statistic property, # and the corresponding P-value through the Probability property:
print "Test statistic: {0:.4f}".format(ksTest2.Statistic)
print "P-value: {0:.4f}".format(ksTest2.PValue)
# We can now print the test results:
print "Reject null hypothesis?", "yes" if ksTest2.Reject() else "no"
#
# Anderson-Darling Test
#
print "\nAnderson-Darling Test"
# The Anderson-Darling is defined for a small number of
# distributions. Currently, only the normal distribution
# is supported.
# We will investigate the distribution of the strength
# of polished airplane windows. The data comes from
# Fuller, e.al. (NIST, 1993) and represents the pressure
# (in psi).
# First, create a numerical variable:
strength = NumericalVariable(Vector([ \
18.830, 20.800, 21.657, 23.030, 23.230, 24.050, 24.321, \
25.500, 25.520, 25.800, 26.690, 26.770, 26.780, 27.050, \
27.670, 29.900, 31.110, 33.200, 33.730, 33.760, 33.890, \
34.760, 35.750, 35.910, 36.980, 37.080, 37.090, 39.580, \
44.045, 45.290, 45.381]))
# Let's print some summary statistics:
print "Number of observations:", strength.Length
print "Mean: {0:.3f}".format(strength.Mean)
print "Standard deviation: {0:.3f}".format(strength.StandardDeviation)
# The most refined test of normality is the Anderson-Darling test.
adTest = AndersonDarlingTest(strength)
# We can obtan the value of the test statistic through the Statistic property, # and the corresponding P-value through the Probability property:
print "Test statistic: {0:.4f}".format(adTest.Statistic)
print "P-value: {0:.4f}".format(adTest.PValue)
# We can now print the test results:
print "Reject null hypothesis?", "yes" if adTest.Reject() else "no"