New Version 6.0!

Try it for free with our fully functional 60-day trial version.

Download now!

QuickStart Samples

Logistic Regression QuickStart Sample (C#)

Illustrates how to use the LogisticRegressionModel class to create logistic regression models in C#.

Visual Basic code F# code IronPython code Back to QuickStart Samples

using System;
using System.Data;
using System.IO;

using Extreme.DataAnalysis;
using Extreme.Mathematics;
using Extreme.Statistics;
using Extreme.Mathematics.LinearAlgebra.IO;

namespace Extreme.Numerics.QuickStart.CSharp

    /// <summary>
    /// Illustrates building logistic regression models using 
    /// the LogisticRegressionModel class in the 
    /// Extreme.Statistics namespace of the Extreme
    /// Optimization Numerical Libraries for .NET.
    /// </summary>
    class LogisticRegression
        static void Main(string[] args)
            // Logistic regression can be performed using 
            // the LogisticRegressionModel class.
            // This QuickStart sample uses data from a study of factors
            // that determine low birth weight at Baystate Medical Center.
            // from Belsley, Kuh and Welsch. The fields are as follows:
            //   AGE:  Mother's age.
            //   LWT:  Mother's weight.
            //   RACE: 1=white, 2=black, 3=other.
            //   FVT:  Number of physician visits during the 1st trimester.
            //   LOW:  Low birth weight indicator.

            // First, read the data from a file into an ADO.NET DataTable. 
            var reader = new Statistics.IO.FixedWidthDataFileReader(
            reader.SetColumnBreaks(0, 4, 11, 18, 25, 33, 42, 49, 55, 61, 68);
            var data = reader.ReadDataFrame();

            // Now create the regression model. Parameters are the name 
            // of the dependent variable, a string array containing 
            // the names of the independent variables, and the data frame
            // containing all variables.

            // Categorical variables are automatically expanded into
            // indicator variables if they are marked properly:
            data.MakeCategorical("RACE", Index.Create(new[] { 1, 2, 3 }));

            var model = new LogisticRegressionModel(data, "LOW", 
                new string[] { "AGE", "LWT", "RACE", "FTV" });

            // Alternatively, we can use a formula to describe the variables
            // in the model. The dependent variable goes on the left, the
            // independent variables on the right of the ~:
            model = new LogisticRegressionModel(data, "LOW ~ AGE + LWT + RACE + FTV");

            // The Compute method performs the actual regression analysis.

            // The Parameters collection contains information about the regression 
            // parameters.
            Console.WriteLine("Variable              Value    Std.Error  t-stat  p-Value");
            foreach (Parameter parameter in model.Parameters)
                // Parameter objects have the following properties:
                Console.WriteLine("{0,-20}{1,10:F5}{2,10:F5}{3,8:F2} {4,7:F4}",
                    // Name, usually the name of the variable:
                    // Estimated value of the parameter:
                    // Standard error:
                    // The value of the t statistic for the hypothesis that the parameter
                    // is zero.
                    // Probability corresponding to the t statistic.

            // The log-likelihood of the computed solution is also available:
            Console.WriteLine("Log-likelihood: {0:F4}", model.LogLikelihood);

            // We can test the significance by looking at the results
            // of a log-likelihood test, which compares the model to
            // a constant-only model:
            Extreme.Statistics.Tests.SimpleHypothesisTest lrt = model.GetLikelihoodRatioTest();
            Console.WriteLine("Likelihood-ratio test: chi-squared={0:F4}, p={1:F4}", lrt.Statistic, lrt.PValue);

            // We can compute a model with fewer parameters:
            var model2 = new LogisticRegressionModel(data, "LOW ~ LWT + RACE");

            // Print the results...
            Console.WriteLine("Variable              Value    Std.Error  t-stat  p-Value");
            foreach (Parameter parameter in model2.Parameters)
                Console.WriteLine("{0,-20}{1,10:F5}{2,10:F5}{3,8:F2} {4,7:F4}",
                    parameter.Name, parameter.Value, parameter.StandardError, parameter.Statistic, parameter.PValue);
            // ...including the log-likelihood:
            Console.WriteLine("Log-likelihood: {0:F4}", model2.LogLikelihood);

            // We can now compare the original model to this one, once again
            // using the likelihood ratio test:
            lrt = model.GetLikelihoodRatioTest(model2);
            Console.WriteLine("Likelihood-ratio test: chi-squared={0:F4}, p={1:F4}", lrt.Statistic, lrt.PValue);

            // Multinomial (polytopous) logistic regression

            // The LogisticRegressionModel class can also be used
            // for logistic regression with more than 2 responses.
            // The following example is from "Applied Linear Statistical
            // Models."

            // Load the data into a matrix
            FixedWidthMatrixReader reader2 = new FixedWidthMatrixReader(
                0, new int[] { 5, 10, 15, 20, 25, 32, 37, 42, 47 }, 
                System.Globalization.NumberStyles.Integer, null);
            var m = reader2.ReadMatrix();

            // Next, convert the columns to variables.

            // For multinomial regression, the response variable must be
            // a CategoricalVariable:
            var duration = m.GetColumn(1).AsCategorical();
            var nutritio = m.GetColumn(5);
            var agecat1 = m.GetColumn(6);
            var agecat3 = m.GetColumn(7);
            var alcohol = m.GetColumn(8);
            var smoking = m.GetColumn(9);

            var dataFrame = DataFrame.FromColumns(
                new[] { duration, nutritio, agecat1, agecat3, alcohol, smoking },
                new[] { "duration", "nutritio", "agecat1", "agecat3", "alcohol", "smoking" });

            // The constructor takes an extra argument of type
            // LogisticRegressionMethod:
            var model3 = new LogisticRegressionModel(duration,
                new Vector<double>[] { nutritio, agecat1, agecat3, alcohol, smoking },
                method: LogisticRegressionMethod.Nominal);

            // When using a formula, we can use '.' as a shortcut 
            // for all unused variables in the data frame.
            // Because duration has 3 levels, nominal logistic regression
            // is automatically inferred.
            model3 = new LogisticRegressionModel(dataFrame, "duration ~ .");

            // Everything else is the same:

            // There is a set of parameters for each level of the
            // response variable. The highest level is the reference 
            // level and has no associated parameters.
            foreach (Parameter p in model3.Parameters) {

            Console.WriteLine("Log likelihood: {0:F4}", model3.LogLikelihood);

            // To test the hypothesis that all the slopes are zero,
            // use the GetLikelihoodRatioTest method.
            lrt = model3.GetLikelihoodRatioTest();
            Console.WriteLine("Test that all slopes are zero: chi-squared={0:F4}, p={1:F4}", lrt.Statistic, lrt.PValue);

            Console.Write("Press any key to exit.");