Data Analysis Mathematics Linear Algebra Statistics
New Version 6.0!

Try it for free with our fully functional 60-day trial version.

QuickStart Samples

# Generalized Linear Models QuickStart Sample (C#)

Illustrates how to use the GeneralizedLinearModel class to compute probit, Poisson and similar regression models in C#.

```using System;
using System.Data;
using System.IO;

using Extreme.DataAnalysis;
using Extreme.Mathematics;
using Extreme.Mathematics.LinearAlgebra.IO;
using Extreme.Statistics;

namespace Extreme.Numerics.QuickStart.CSharp
{

/// <summary>
/// Illustrates building generalized linear models using
/// the GeneralizedLinearModel class in the
/// Extreme.Statistics namespace of the Extreme
/// Optimization Numerical Libraries for .NET.
/// </summary>
class GeneralizedLinearModels
{
/// <summary>
/// The main entry point for the application.
/// </summary>
static void Main(string[] args)
{
// Generalized linear models can be computed using
// the GeneralizedLinearModel class.

//
// Poisson regression
//

// This QuickStart sample uses data about the attendance of 316 students
// from two urban high schools. The fields are as follows:
//   daysabs: The number of days the student was absent.
//   male:    A binary indicator of gender.
//   math:    The student's standardized math score.
//   langarts:The student's standardized language arts score.
//
// We want to investigate the relationship between these variables.
//
// See http://www.ats.ucla.edu/stat/stata/dae/poissonreg.htm

// First, read the data from a file into a VariableCollection.
// The ReadAttendanceData method is defined later in this file.
var data = ReadAttendanceData();

// Now create the regression model. Parameters are the name
// of the dependent variable, a string array containing
// the names of the independent variables, and the VariableCollection
// containing all variables.
var model = new GeneralizedLinearModel(data,
"daysabs", new string[] { "math", "langarts", "male" });

model = new GeneralizedLinearModel(data,
"daysabs ~ math + langarts + male");

// The ModelFamily specifies the distribution of the dependent variable.
// Since we're dealing with count data, we use a Poisson model:
model.ModelFamily = ModelFamily.Poisson;

// The LinkFunction specifies the relationship between the dependent variable
// and the linear predictor of independent variables. In this case,
// we use the canonical link function, which is the default.

// The Compute method performs the actual regression analysis.
model.Compute();

// The Parameters collection contains information about the regression
// parameters.
Console.WriteLine("Variable              Value    Std.Error    z     p-Value");
foreach (Parameter parameter in model.Parameters)
{
// Parameter objects have the following properties:
Console.WriteLine("{0,-20}{1,10:F6}{2,10:F6}{3,8:F2} {4,7:F5}",
// Name, usually the name of the variable:
parameter.Name,
// Estimated value of the parameter:
parameter.Value,
// Standard error:
parameter.StandardError,
// The value of the z score for the hypothesis that the parameter
// is zero.
parameter.Statistic,
// Probability corresponding to the t statistic.
parameter.PValue);
}
Console.WriteLine();

// In addition to these properties, Parameter objects have a GetConfidenceInterval
// method that returns a confidence interval at a specified confidence level.
// Notice that individual parameters can be accessed using their numeric index.
// Parameter 0 is the intercept, if it was included.
Interval confidenceInterval = model.Parameters[0].GetConfidenceInterval(0.95);
Console.WriteLine("95% confidence interval for math score: {0:F4} - {1:F4}",
confidenceInterval.LowerBound, confidenceInterval.UpperBound);

// Parameters can also be accessed by name:
confidenceInterval = model.Parameters.Get("math").GetConfidenceInterval(0.95);
Console.WriteLine("95% confidence interval for math score: {0:F4} - {1:F4}",
confidenceInterval.LowerBound, confidenceInterval.UpperBound);
Console.WriteLine();

// There is also a wealth of information about the analysis available
// through various properties of the GeneralizedLinearModel object:
Console.WriteLine("Log likelihood:         {0:F4}", model.LogLikelihood);
Console.WriteLine("Kernel log likelihood:  {0:F4}", model.GetKernelLogLikelihood());

// Note that some statistical applications (notably stata) use
// a different definition of some of these "information criteria":
Console.WriteLine("\"Information Criteria\"");
Console.WriteLine("Akaike (AIC):           {0:F3}", model.GetAkaikeInformationCriterion());
Console.WriteLine("Corrected AIC:          {0:F3}", model.GetCorrectedAkaikeInformationCriterion());
Console.WriteLine("Bayesian (BIC):         {0:F3}", model.GetBayesianInformationCriterion());
Console.WriteLine("Chi Square:             {0:F3}", model.GetChiSquare());
Console.WriteLine();

//
// Probit regression
//

// In a second example, we investigate the relationship between whether a student
// graduates, and the student's GRE scores,grade point averages, the level
// of the school from a "top notch" school. The fields are as follows:
//   admit:    Dependent variable
//   gre:      The student's GRE score.
//   topnotch: A binary indicator of the type of school
//   gpa:      The student's Grade Point Average.
//
// The data was generated.
// See http://www.ats.ucla.edu/stat/stata/dae/probit.htm

// First, read the data from a file into a VariableCollection.
// The ReadGraduateData method is defined later in this file.

// Now create the regression model. Parameters are the name
// of the dependent variable, a string array containing
// the names of the independent variables, and the VariableCollection
// containing all variables.
model = new GeneralizedLinearModel(data,
"admit", new string[] { "gre", "topnotch", "gpa" });

// The ModelFamily specifies the distribution of the dependent variable.
// Since we're dealing with binary data, we use a Binomial model:
model.ModelFamily = ModelFamily.Binomial;

// We use the probit link function.

// The Compute method performs the actual regression analysis.
model.Compute();

// The Parameters collection contains information about the regression
// parameters.
Console.WriteLine("Variable              Value    Std.Error    z     p-Value");
foreach (Parameter parameter in model.Parameters)
{
Console.WriteLine("{0,-20}{1,10:F6}{2,10:F6}{3,8:F2} {4,7:F5}",
parameter.Name,
parameter.Value,
parameter.StandardError,
parameter.Statistic,
parameter.PValue);
}
Console.WriteLine();

// There is also a wealth of information about the analysis available
// through various properties of the GeneralizedLinearModel object:
Console.WriteLine("Log likelihood:         {0:F4}", model.LogLikelihood);
Console.WriteLine("Kernel log likelihood:  {0:F4}", model.GetKernelLogLikelihood());

// Note that some statistical applications (notably stata) use
// a different definition of some of these "information criteria":
Console.WriteLine("\"Information Criteria\"");
Console.WriteLine("Akaike (AIC):           {0:F3}", model.GetAkaikeInformationCriterion());
Console.WriteLine("Corrected AIC:          {0:F3}", model.GetCorrectedAkaikeInformationCriterion());
Console.WriteLine("Bayesian (BIC):         {0:F3}", model.GetBayesianInformationCriterion());
Console.WriteLine("Chi Square:             {0:F3}", model.GetChiSquare());
Console.WriteLine();

Console.Write("Press any key to exit.");
}

/// <summary>
/// Reads the data from a text file into a <see cref="VariableCollection"/>.
/// </summary>
/// <returns>A <see cref="VariableCollection"/></returns>
public static DataFrame<long, string> ReadAttendanceData()
{
reader.SetColumnDelimiters(new char[] { ',' });
reader.SetRowDelimiters(new char[] { '\r', '\n' });
var columnIndex = Index.Create(new string[] {
"id", "school", "male", "math", "langarts", "daysatt", "daysabs" });
return m.ToDataFrame(Index.Default(m.RowCount), columnIndex);

}

/// <summary>
/// Reads the data from a text file into a <see cref="VariableCollection"/>.
/// </summary>
/// <returns>A <see cref="VariableCollection"/></returns>