Data Analysis Mathematics Linear Algebra Statistics
New Version 7.0!  QuickStart Samples

# Goodness-Of-Fit Tests QuickStart Sample (IronPython)

Illustrates how to test for goodness-of-fit using classes in the Extreme.Statistics.Tests namespace in IronPython.

```import numerics

from System import Array

from Extreme.Mathematics import *

from Extreme.Statistics import *
from Extreme.Statistics.Distributions import *
from Extreme.Statistics.Random import MersenneTwister
from Extreme.Statistics.Tests import *

#/ Illustrates the Chi Square, Kolmogorov-Smirnov and Anderson-Darling
#/ tests for goodness-of-fit.

# This QuickStart Sample illustrates the wide variety of goodness-of-fit
# tests available.

#
# Chi-square Test
#

print "Chi-square test."

# The Chi-square test is the simplest of the goodness-of-fit tests.
# The results follow a binomial distribution with 3 trials (rolls of the dice):
sixesDistribution = BinomialDistribution(3, 1/6.0)

# First, create a histogram with the expected results.
expected = sixesDistribution.GetExpectedHistogram(100)

# And a histogram with the actual results
actual = Histogram(0, 4)
actual.SetTotals(Array[float]([51, 35, 12, 2]))
chiSquare = ChiSquareGoodnessOfFitTest(actual, expected)
chiSquare.SignificanceLevel = 0.01

# We can obtan the value of the test statistic through the Statistic property, # and the corresponding P-value through the Probability property:
print "Test statistic: {0:.4f}".format(chiSquare.Statistic)
print "P-value:        {0:.4f}".format(chiSquare.PValue)

# We can now print the test results:
print "Reject null hypothesis?", "yes" if chiSquare.Reject() else "no"

#
# One-sample Kolmogorov-Smirnov Test
#

print "\nOne-sample Kolmogorov-Smirnov Test"

# We will investigate a sample of 25 random numbers from a lognormal distribution
# and investigate how well it matches a similar looking Weibull distribution.

# We first create the two distributions:
logNormal = LognormalDistribution(0, 1)
weibull = WeibullDistribution(2, 1)

# Then we generate the samples from the lognormal distribution:
logNormalData = Vector.Create(25)
logNormal.Sample(MersenneTwister(), logNormalData)
logNormalSample = NumericalVariable(logNormalData)

# Finally, we construct the Kolmogorov-Smirnov test:
ksTest = OneSampleKolmogorovSmirnovTest(logNormalSample, weibull)

# We can obtan the value of the test statistic through the Statistic property, # and the corresponding P-value through the Probability property:
print "Test statistic: {0:.4f}".format(ksTest.Statistic)
print "P-value:        {0:.4f}".format(ksTest.PValue)

# We can now print the test results:
print "Reject null hypothesis?", "yes" if ksTest.Reject() else "no"

#
# Two-sample Kolmogorov-Smirnov Test
#

print "\nTwo-sample Kolmogorov-Smirnov Test"

# We once again investigate the similarity between a lognormal and
# a Weibull distribution. However, this time, we use 25 random
# samples from each distribution.

# We already have the lognormal samples.
# Generate the samples from the Weibull distribution:
weibullData = Vector.Create(25)
weibull.Sample(MersenneTwister(), weibullData)
weibullSample = NumericalVariable(weibullData)

# Finally, we construct the Kolmogorov-Smirnov test:
ksTest2 = TwoSampleKolmogorovSmirnovTest(logNormalSample, weibullSample)

# We can obtan the value of the test statistic through the Statistic property, # and the corresponding P-value through the Probability property:
print "Test statistic: {0:.4f}".format(ksTest2.Statistic)
print "P-value:        {0:.4f}".format(ksTest2.PValue)

# We can now print the test results:
print "Reject null hypothesis?", "yes" if ksTest2.Reject() else "no"

#
# Anderson-Darling Test
#

print "\nAnderson-Darling Test"

# The Anderson-Darling is defined for a small number of
# distributions. Currently, only the normal distribution
# is supported.

# We will investigate the distribution of the strength
# of polished airplane windows. The data comes from
# Fuller, e.al. (NIST, 1993) and represents the pressure
# (in psi).

# First, create a numerical variable:
strength = NumericalVariable(Vector([ \
18.830, 20.800, 21.657, 23.030, 23.230, 24.050, 24.321, \
25.500, 25.520, 25.800, 26.690, 26.770, 26.780, 27.050, \
27.670, 29.900, 31.110, 33.200, 33.730, 33.760, 33.890, \
34.760, 35.750, 35.910, 36.980, 37.080, 37.090, 39.580, \
44.045, 45.290, 45.381]))

# Let's print some summary statistics:
print "Number of observations:", strength.Length
print "Mean:                   {0:.3f}".format(strength.Mean)
print "Standard deviation:     {0:.3f}".format(strength.StandardDeviation)

# The most refined test of normality is the Anderson-Darling test.