Illustrates how to use the LogisticRegressionModel class to create logistic regression models in C#.
using System;
using System.Data;
using System.IO;
using Extreme.DataAnalysis;
using Extreme.Mathematics;
using Extreme.Statistics;
using Extreme.Mathematics.LinearAlgebra.IO;
namespace Extreme.Numerics.QuickStart.CSharp
{
/// <summary>
/// Illustrates building logistic regression models using
/// the LogisticRegressionModel class in the
/// Extreme.Statistics namespace of the Extreme
/// Optimization Numerical Libraries for .NET.
/// </summary>
class LogisticRegression
{
static void Main(string[] args)
{
// Logistic regression can be performed using
// the LogisticRegressionModel class.
//
// This QuickStart sample uses data from a study of factors
// that determine low birth weight at Baystate Medical Center.
// from Belsley, Kuh and Welsch. The fields are as follows:
// AGE: Mother's age.
// LWT: Mother's weight.
// RACE: 1=white, 2=black, 3=other.
// FVT: Number of physician visits during the 1st trimester.
// LOW: Low birth weight indicator.
// First, read the data from a file into an ADO.NET DataTable.
var reader = new Statistics.IO.FixedWidthDataFileReader(
@"..\..\..\..\Data\lowbwt.txt");
reader.SetColumnBreaks(0, 4, 11, 18, 25, 33, 42, 49, 55, 61, 68);
var data = reader.ReadDataFrame();
reader.Close();
// Now create the regression model. Parameters are the name
// of the dependent variable, a string array containing
// the names of the independent variables, and the data frame
// containing all variables.
// Categorical variables are automatically expanded into
// indicator variables if they are marked properly:
data.MakeCategorical("RACE", Index.Create(new[] { 1, 2, 3 }));
var model = new LogisticRegressionModel(data, "LOW",
new string[] { "AGE", "LWT", "RACE", "FTV" });
// Alternatively, we can use a formula to describe the variables
// in the model. The dependent variable goes on the left, the
// independent variables on the right of the ~:
model = new LogisticRegressionModel(data, "LOW ~ AGE + LWT + RACE + FTV");
// The Compute method performs the actual regression analysis.
model.Compute();
// The Parameters collection contains information about the regression
// parameters.
Console.WriteLine("Variable Value Std.Error t-stat p-Value");
foreach (Parameter parameter in model.Parameters)
// Parameter objects have the following properties:
Console.WriteLine("{0,-20}{1,10:F5}{2,10:F5}{3,8:F2} {4,7:F4}",
// Name, usually the name of the variable:
parameter.Name,
// Estimated value of the parameter:
parameter.Value,
// Standard error:
parameter.StandardError,
// The value of the t statistic for the hypothesis that the parameter
// is zero.
parameter.Statistic,
// Probability corresponding to the t statistic.
parameter.PValue);
// The log-likelihood of the computed solution is also available:
Console.WriteLine("Log-likelihood: {0:F4}", model.LogLikelihood);
// We can test the significance by looking at the results
// of a log-likelihood test, which compares the model to
// a constant-only model:
Extreme.Statistics.Tests.SimpleHypothesisTest lrt = model.GetLikelihoodRatioTest();
Console.WriteLine("Likelihood-ratio test: chi-squared={0:F4}, p={1:F4}", lrt.Statistic, lrt.PValue);
Console.WriteLine();
// We can compute a model with fewer parameters:
var model2 = new LogisticRegressionModel(data, "LOW ~ LWT + RACE");
model2.Compute();
// Print the results...
Console.WriteLine("Variable Value Std.Error t-stat p-Value");
foreach (Parameter parameter in model2.Parameters)
Console.WriteLine("{0,-20}{1,10:F5}{2,10:F5}{3,8:F2} {4,7:F4}",
parameter.Name, parameter.Value, parameter.StandardError, parameter.Statistic, parameter.PValue);
// ...including the log-likelihood:
Console.WriteLine("Log-likelihood: {0:F4}", model2.LogLikelihood);
// We can now compare the original model to this one, once again
// using the likelihood ratio test:
lrt = model.GetLikelihoodRatioTest(model2);
Console.WriteLine("Likelihood-ratio test: chi-squared={0:F4}, p={1:F4}", lrt.Statistic, lrt.PValue);
Console.WriteLine();
//
// Multinomial (polytopous) logistic regression
//
// The LogisticRegressionModel class can also be used
// for logistic regression with more than 2 responses.
// The following example is from "Applied Linear Statistical
// Models."
// Load the data into a matrix
FixedWidthMatrixReader reader2 = new FixedWidthMatrixReader(
File.OpenText(@"..\..\..\..\Data\mlogit.txt"),
0, new int[] { 5, 10, 15, 20, 25, 32, 37, 42, 47 },
System.Globalization.NumberStyles.Integer, null);
var m = reader2.ReadMatrix();
// Next, convert the columns to variables.
// For multinomial regression, the response variable must be
// a CategoricalVariable:
var duration = m.GetColumn(1).AsCategorical();
var nutritio = m.GetColumn(5);
var agecat1 = m.GetColumn(6);
var agecat3 = m.GetColumn(7);
var alcohol = m.GetColumn(8);
var smoking = m.GetColumn(9);
var dataFrame = DataFrame.FromColumns(
new[] { duration, nutritio, agecat1, agecat3, alcohol, smoking },
new[] { "duration", "nutritio", "agecat1", "agecat3", "alcohol", "smoking" });
// The constructor takes an extra argument of type
// LogisticRegressionMethod:
var model3 = new LogisticRegressionModel(duration,
new Vector<double>[] { nutritio, agecat1, agecat3, alcohol, smoking },
method: LogisticRegressionMethod.Nominal);
// When using a formula, we can use '.' as a shortcut
// for all unused variables in the data frame.
// Because duration has 3 levels, nominal logistic regression
// is automatically inferred.
model3 = new LogisticRegressionModel(dataFrame, "duration ~ .");
// Everything else is the same:
model3.Compute();
// There is a set of parameters for each level of the
// response variable. The highest level is the reference
// level and has no associated parameters.
foreach (Parameter p in model3.Parameters) {
Console.WriteLine(p.ToString());
}
Console.WriteLine("Log likelihood: {0:F4}", model3.LogLikelihood);
// To test the hypothesis that all the slopes are zero,
// use the GetLikelihoodRatioTest method.
lrt = model3.GetLikelihoodRatioTest();
Console.WriteLine("Test that all slopes are zero: chi-squared={0:F4}, p={1:F4}", lrt.Statistic, lrt.PValue);
Console.Write("Press any key to exit.");
Console.ReadLine();
}
}
}