New Version 5.0!

Try it for free with our fully functional 60-day trial version.

Download now!

QuickStart Samples

Generalized Linear Models QuickStart Sample (F#)

Illustrates how to use the GeneralizedLinearModel class to compute probit, Poisson and similar regression models in F#.

C# code Visual Basic code Back to QuickStart Samples

#light

open System

open System.Data
open System.IO

open Extreme.Mathematics
open Extreme.Mathematics.LinearAlgebra.IO
open Extreme.Statistics

// Illustrates building generalized linear models using 
// the GeneralizedLinearModel class in the 
// Extreme.Statistics namespace of the Extreme
// Optimization Numerical Libraries for .NET.

// Generalized linear models can be computed using 
// the GeneralizedLinearModel class.

//
// Poisson regression
//

// This QuickStart sample uses data about the attendance of 316 students
// from two urban high schools. The fields are as follows:
//   daysabs: The number of days the student was absent.
//   male:    A binary indicator of gender.
//   math:    The student's standardized math score.
//   langarts:The student's standardized language arts score.
//
// We want to investigate the relationship between these variables.
// 
// See http://www.ats.ucla.edu/stat/stata/dae/poissonreg.htm

// First, read the data from a file into a VariableCollection.
// Reads the data from a text file into a <see cref="VariableCollection"/>.
let ReadAttendanceData unit =
    use reader = new DelimitedTextMatrixReader(@"..\..\..\..\data\PoissonReg.csv")
    reader.StartRow <- 1
    reader.SetColumnDelimiters([| ',' |])
    reader.SetRowDelimiters([| '\r'; '\n' |])
    reader.MergeConsecutiveDelimiters <- false
    let m = reader.ReadMatrix()
    let col = VariableCollection()
    col.Add(new NumericalVariable("id", m.GetColumn(0))) |> ignore
    col.Add(new NumericalVariable("school", m.GetColumn(1))) |> ignore
    col.Add(new NumericalVariable("male", m.GetColumn(2))) |> ignore
    col.Add(new NumericalVariable("math", m.GetColumn(3))) |> ignore
    col.Add(new NumericalVariable("langarts", m.GetColumn(4))) |> ignore
    col.Add(new NumericalVariable("daysatt", m.GetColumn(5))) |> ignore
    col.Add(new NumericalVariable("daysabs", m.GetColumn(6))) |> ignore
    col

let data = ReadAttendanceData()

// Now create the regression model. Parameters are the name 
// of the dependent variable, a let array containing 
// the names of the independent variables, and the VariableCollection
// containing all variables.
let model = GeneralizedLinearModel(data, "daysabs", [| "math"; "langarts"; "male" |])

// The ModelFamily specifies the distribution of the dependent variable.
// Since we're dealing with count data, we use a Poisson model:
model.ModelFamily <- ModelFamily.Poisson

// The LinkFunction specifies the relationship between the dependent variable
// and the linear predictor of independent variables. In this case,
// we use the canonical link function, which is the default.
model.LinkFunction <- ModelFamily.Poisson.CanonicalLinkFunction

// The Compute method performs the actual regression analysis.
model.Compute()

// The Parameters collection contains information about the regression 
// parameters.
printfn "Variable  Value    Std.Error    z     p-Value"
for parameter in model.Parameters do
    // Parameter objects have the following properties:
    printfn "%-8s%10.6f%10.6f%8.2f %7.5f"
        // Name, usually the name of the variable:
        parameter.Name
        // Estimated value of the parameter:
        parameter.Value
        // Standard error:
        parameter.StandardError
        // The value of the z score for the hypothesis that the parameter
        // is zero.
        parameter.Statistic
        // Probability corresponding to the t statistic.
        parameter.PValue
printfn ""

// In addition to these properties, Parameter objects have a GetConfidenceInterval
// method that returns a confidence interval at a specified confidence level.
// Notice that individual parameters can be accessed using their numeric index.
// Parameter 0 is the intercept, if it was included.
let confidenceInterval = model.Parameters.[0].GetConfidenceInterval(0.95)
printfn "95%% confidence interval for constant term: %.4f - %.4f"
    confidenceInterval.LowerBound confidenceInterval.UpperBound

// Parameters can also be accessed by name:
let confidenceInterval2 = model.Parameters.["math"].GetConfidenceInterval(0.95)
printfn "95%% confidence interval for math score: %.4f - %.4f"
    confidenceInterval2.LowerBound confidenceInterval2.UpperBound
printfn ""

// There is also a wealth of information about the analysis available
// through various properties of the GeneralizedLinearModel object:
printfn "Log likelihood:         %.4f" (model.GetLogLikelihood())
printfn "Kernel log likelihood:  %.4f" (model.GetKernelLogLikelihood())

// Note that some statistical applications (notably stata) use 
// a different definition of some of these "information criteria":
printfn "\"Information Criteria\""
printfn "Akaike (AIC):           %.3f" (model.GetAkaikeInformationCriterion())
printfn "Corrected AIC:          %.3f" (model.GetCorrectedAkaikeInformationCriterion())
printfn "Bayesian (BIC):         %.3f" (model.GetBayesianInformationCriterion())
printfn "Chi Square: %.3f" (model.GetChiSquare())
printfn ""

//
// Probit regression
// 

// In a second example, we investigate the relationship between whether a student
// graduates, and the student's GRE scores,grade point averages, the level 
// of the school from a "top notch" school. The fields are as follows:
//   admit:    Dependent variable
//   gre:      The student's GRE score.
//   topnotch: A binary indicator of the type of school
//   gpa:      The student's Grade Point Average.
// 
// The data was generated.
// See http://www.ats.ucla.edu/stat/stata/dae/probit.htm

// First, read the data from a file into a VariableCollection.
let ReadGraduateData unit =
    use reader = new FixedWidthMatrixReader(@"..\..\..\..\data\probit.dat")
    reader.StartRow <- 1
    reader.SetColumnBreaks(9, 18, 27)
    let m = reader.ReadMatrix()
    let col = VariableCollection()
    ignore(col.Add(new NumericalVariable("admit", m.GetColumn(0))))
    ignore(col.Add(new NumericalVariable("gre", m.GetColumn(1))))
    ignore(col.Add(new NumericalVariable("topnotch", m.GetColumn(2))))
    ignore(col.Add(new NumericalVariable("gpa", m.GetColumn(3))))
    col

let data2 = ReadGraduateData()

// Now create the regression model. Parameters are the name 
// of the dependent variable, a let array containing 
// the names of the independent variables, and the VariableCollection
// containing all variables.
let model2 = GeneralizedLinearModel(data2, "admit", [| "gre"; "topnotch"; "gpa" |])

// The ModelFamily specifies the distribution of the dependent variable.
// Since we're dealing with binary data, we use a Binomial model:
model2.ModelFamily <- ModelFamily.Binomial

// We use the probit link function.
model2.LinkFunction <- LinkFunction.Probit

// The Compute method performs the actual regression analysis.
model2.Compute()

// The Parameters collection contains information about the regression 
// parameters.
printfn "Variable  Value    Std.Error    z     p-Value"
for parameter in model2.Parameters do
    printfn "%-8s%10.6f%10.6f%8.2f %7.5f" parameter.Name parameter.Value
        parameter.StandardError parameter.Statistic parameter.PValue
printfn ""

// There is also a wealth of information about the analysis available
// through various properties of the GeneralizedLinearModel object:
printfn "Log likelihood:         %.4f" (model2.GetLogLikelihood())
printfn "Kernel log likelihood:  %.4f" (model2.GetKernelLogLikelihood())

// Note that some statistical applications (notably stata) use 
// a different definition of some of these "information criteria":
printfn "\"Information Criteria\""
printfn "Akaike (AIC):           %.3f" (model2.GetAkaikeInformationCriterion())
printfn "Corrected AIC:          %.3f" (model2.GetCorrectedAkaikeInformationCriterion())
printfn "Bayesian (BIC):         %.3f" (model2.GetBayesianInformationCriterion())
printfn "Chi Square: %.3f" (model2.GetChiSquare())
printfn ""

printf "Press any key to exit."
Console.ReadLine() |> ignore