Multiple Linear Regression in IronPython QuickStart Sample

Illustrates how to use the LinearRegressionModel class to perform a multiple linear regression in IronPython.

View this sample in: C# Visual Basic F#

```Python
import numerics

from System import Array, Char

from Extreme.Mathematics import *
from Extreme.Statistics import *

# Illustrates building multiple linear regression models using 
# the LinearRegressionModel class in the 
# Extreme.Statistics namespace of the Extreme
# Optimization Numerical Libraries for .NET.

# Multiple linear regression can be performed using 
# the LinearRegressionModel class.
#
# This QuickStart sample uses old economic data about 50 countries
# from Belsley, Kuh and Welsch. The fields are as follows:
#   DispInc: Per capita disposable income.
#   Growth:  Percent rate of change of DispInc.
#   Pop15:   Percentage of population under 15.
#   Pop75:   Percentage of population over 75.
#   Savings: Aggregate savings divided by disposable income.
#
# We want to investigate the effect of the first four variables
# on the savings ratio.

# First, read the data from a file into an ADO.NET DataTable. 
# For the sake of clarity, we put this code in its own method.
# Reads the data from a text file into a <see cref="DataTable"/>.

import clr
clr.AddReference("System.Data")
from System.Data import *
from System.IO import *

def ReadData():
    data = DataTable("savings")

    data.Columns.Add("Key", str)
    whitespace = Array[Char]([ ' ', '\t' ])
    sr = StreamReader(r"..\Data\savings.dat")
    # Read the header and extract the field names.
    line = sr.ReadLine()
    pos = 0
    while True:
        while Char.IsWhiteSpace(line[pos]):
            pos = pos + 1
        pos2 = line.IndexOfAny(whitespace, pos)
        if pos2 < 0:
            data.Columns.Add(line.Substring(pos), float)
            break
        else:
            data.Columns.Add(line.Substring(pos, pos2 - pos), float)
        pos = pos2
        if pos < 0:
            break

    # Now read the data and add them to the table.
    # Assumes all columns except the first are numerical.
    rowData = Array.CreateInstance(object, data.Columns.Count)
    line = sr.ReadLine()
    while line != None and line.Length > 0:
        column = 0
        pos = 0
        while True:
            while Char.IsWhiteSpace(line[pos]):
                pos = pos + 1
            pos2 = line.IndexOfAny(whitespace, pos)
            if pos2 < 0:
                field = line.Substring(pos)
            else:
                field = line.Substring(pos, pos2 - pos)
            if column == 0:
                rowData[column] = field
            else:
                rowData[column] = float.Parse(field)
            column = column + 1
            pos = pos2
            if pos < 0 or column >= data.Columns.Count:
                break
        data.Rows.Add(rowData)
        line = sr.ReadLine()
    return data

dataTable = ReadData()

# Next, create a VariableCollection from the data table:
data = VariableCollection(dataTable)

# Now create the regression model. Parameters are the name 
# of the dependent variable, a string array containing 
# the names of the independent variables, and the VariableCollection
# containing all variables.
model = LinearRegressionModel(data, "Savings", \
    Array[str]([ "Pop15", "Pop75", "DispInc", "Growth"]))

# We can set model options now, such as whether to include a constant:
model.NoIntercept = False

# The Compute method performs the actual regression analysis.
model.Compute()

# The Parameters collection contains information about the regression 
# parameters.
print "Variable              Value    Std.Error  t-stat  p-Value"
for parameter in model.Parameters:
    # Parameter objects have the following properties:
    print "{0:20}{1:10.5f}{2:10.5f}{3:8.2f} {4:7.4f}".format( # Name, usually the name of the variable:
        parameter.Name, # Estimated value of the parameter:
        parameter.Value, # Standard error:
        parameter.StandardError, # The value of the t statistic for the hypothesis that the parameter
        # is zero.
        parameter.Statistic, # Probability corresponding to the t statistic.
        parameter.PValue)
print 

# In addition to these properties, Parameter objects have a GetConfidenceInterval
# method that returns a confidence interval at a specified confidence level.
# Notice that individual parameters can be accessed using their numeric index.
# Parameter 0 is the intercept, if it was included.
confidenceInterval = model.Parameters[0].GetConfidenceInterval(0.95)
print "95% confidence interval for constant:{0:.4f} - {1:.4f}".format(confidenceInterval.LowerBound, confidenceInterval.UpperBound)
			
# Parameters can also be accessed by name:
confidenceInterval = model.Parameters["DispInc"].GetConfidenceInterval(0.95)
print "95% confidence interval for Growth: {0:.4f} - {1:.4f}".format(confidenceInterval.LowerBound, confidenceInterval.UpperBound)
print 

# There is also a wealth of information about the analysis available
# through various properties of the LinearRegressionModel object:
print "Residual standard error: {0:.3f}".format(model.StandardError)
print "R-Squared:               {0:.4f}".format(model.RSquared)
print "Adjusted R-Squared:      {0:.4f}".format(model.AdjustedRSquared)
print "F-statistic:             {0:.4f}".format(model.FStatistic)
print "Corresponding p-value:   {0:F5}".format(model.PValue)
print 

# Much of this data can be summarized in the form of an ANOVA table:
print model.AnovaTable.ToString()

```