New Version 6.0!

Try it for free with our fully functional 60-day trial version.

Download now!

QuickStart Samples

Generalized Linear Models QuickStart Sample (C#)

Illustrates how to use the GeneralizedLinearModel class to compute probit, Poisson and similar regression models in C#.

Visual Basic code F# code Back to QuickStart Samples

using System;
using System.Data;
using System.IO;

using Extreme.DataAnalysis;
using Extreme.Mathematics;
using Extreme.Mathematics.LinearAlgebra.IO;
using Extreme.Statistics;

namespace Extreme.Numerics.QuickStart.CSharp
{

    /// <summary>
    /// Illustrates building generalized linear models using 
    /// the GeneralizedLinearModel class in the 
    /// Extreme.Statistics namespace of the Extreme
    /// Optimization Numerical Libraries for .NET.
    /// </summary>
    class GeneralizedLinearModels
    {
        /// <summary>
        /// The main entry point for the application.
        /// </summary>
        [STAThread]
        static void Main(string[] args)
        {
            // Generalized linear models can be computed using 
            // the GeneralizedLinearModel class.

            //
            // Poisson regression
            //

            // This QuickStart sample uses data about the attendance of 316 students
            // from two urban high schools. The fields are as follows:
            //   daysabs: The number of days the student was absent.
            //   male:    A binary indicator of gender.
            //   math:    The student's standardized math score.
            //   langarts:The student's standardized language arts score.
            //
            // We want to investigate the relationship between these variables.
            // 
            // See http://www.ats.ucla.edu/stat/stata/dae/poissonreg.htm

            // First, read the data from a file into a VariableCollection.
            // The ReadAttendanceData method is defined later in this file.
            var data = ReadAttendanceData();

            // Now create the regression model. Parameters are the name 
            // of the dependent variable, a string array containing 
            // the names of the independent variables, and the VariableCollection
            // containing all variables.
            var model = new GeneralizedLinearModel(data,
                "daysabs", new string[] { "math", "langarts", "male" });

            model = new GeneralizedLinearModel(data,
                "daysabs ~ math + langarts + male");

            // The ModelFamily specifies the distribution of the dependent variable.
            // Since we're dealing with count data, we use a Poisson model:
            model.ModelFamily = ModelFamily.Poisson;

            // The LinkFunction specifies the relationship between the dependent variable
            // and the linear predictor of independent variables. In this case,
            // we use the canonical link function, which is the default.
            model.LinkFunction = ModelFamily.Poisson.CanonicalLinkFunction;

            // The Compute method performs the actual regression analysis.
            model.Compute();

            // The Parameters collection contains information about the regression 
            // parameters.
            Console.WriteLine("Variable              Value    Std.Error    z     p-Value");
            foreach (Parameter parameter in model.Parameters)
            {
                // Parameter objects have the following properties:
                Console.WriteLine("{0,-20}{1,10:F6}{2,10:F6}{3,8:F2} {4,7:F5}",
                    // Name, usually the name of the variable:
                    parameter.Name,
                    // Estimated value of the parameter:
                    parameter.Value,
                    // Standard error:
                    parameter.StandardError,
                    // The value of the z score for the hypothesis that the parameter
                    // is zero.
                    parameter.Statistic,
                    // Probability corresponding to the t statistic.
                    parameter.PValue);
            }
            Console.WriteLine();

            // In addition to these properties, Parameter objects have a GetConfidenceInterval
            // method that returns a confidence interval at a specified confidence level.
            // Notice that individual parameters can be accessed using their numeric index.
            // Parameter 0 is the intercept, if it was included.
            Interval confidenceInterval = model.Parameters[0].GetConfidenceInterval(0.95);
            Console.WriteLine("95% confidence interval for math score: {0:F4} - {1:F4}",
                confidenceInterval.LowerBound, confidenceInterval.UpperBound);

            // Parameters can also be accessed by name:
            confidenceInterval = model.Parameters.Get("math").GetConfidenceInterval(0.95);
            Console.WriteLine("95% confidence interval for math score: {0:F4} - {1:F4}",
                confidenceInterval.LowerBound, confidenceInterval.UpperBound);
            Console.WriteLine();

            // There is also a wealth of information about the analysis available
            // through various properties of the GeneralizedLinearModel object:
            Console.WriteLine("Log likelihood:         {0:F4}", model.LogLikelihood);
            Console.WriteLine("Kernel log likelihood:  {0:F4}", model.GetKernelLogLikelihood());

            // Note that some statistical applications (notably stata) use 
            // a different definition of some of these "information criteria":
            Console.WriteLine("\"Information Criteria\"");
            Console.WriteLine("Akaike (AIC):           {0:F3}", model.GetAkaikeInformationCriterion());
            Console.WriteLine("Corrected AIC:          {0:F3}", model.GetCorrectedAkaikeInformationCriterion());
            Console.WriteLine("Bayesian (BIC):         {0:F3}", model.GetBayesianInformationCriterion());
            Console.WriteLine("Chi Square:             {0:F3}", model.GetChiSquare());
            Console.WriteLine();

            //
            // Probit regression
            // 

            // In a second example, we investigate the relationship between whether a student
            // graduates, and the student's GRE scores,grade point averages, the level 
            // of the school from a "top notch" school. The fields are as follows:
            //   admit:    Dependent variable
            //   gre:      The student's GRE score.
            //   topnotch: A binary indicator of the type of school
            //   gpa:      The student's Grade Point Average.
            // 
            // The data was generated.
            // See http://www.ats.ucla.edu/stat/stata/dae/probit.htm

            // First, read the data from a file into a VariableCollection.
            // The ReadGraduateData method is defined later in this file.
            data = ReadGraduateData();

            // Now create the regression model. Parameters are the name 
            // of the dependent variable, a string array containing 
            // the names of the independent variables, and the VariableCollection
            // containing all variables.
            model = new GeneralizedLinearModel(data,
                "admit", new string[] { "gre", "topnotch", "gpa" });

            // The ModelFamily specifies the distribution of the dependent variable.
            // Since we're dealing with binary data, we use a Binomial model:
            model.ModelFamily = ModelFamily.Binomial;

            // We use the probit link function.
            model.LinkFunction = LinkFunction.Probit;

            // The Compute method performs the actual regression analysis.
            model.Compute();

            // The Parameters collection contains information about the regression 
            // parameters.
            Console.WriteLine("Variable              Value    Std.Error    z     p-Value");
            foreach (Parameter parameter in model.Parameters)
            {
                Console.WriteLine("{0,-20}{1,10:F6}{2,10:F6}{3,8:F2} {4,7:F5}",
                    parameter.Name,
                    parameter.Value,
                    parameter.StandardError,
                    parameter.Statistic,
                    parameter.PValue);
            }
            Console.WriteLine();

            // There is also a wealth of information about the analysis available
            // through various properties of the GeneralizedLinearModel object:
            Console.WriteLine("Log likelihood:         {0:F4}", model.LogLikelihood);
            Console.WriteLine("Kernel log likelihood:  {0:F4}", model.GetKernelLogLikelihood());

            // Note that some statistical applications (notably stata) use 
            // a different definition of some of these "information criteria":
            Console.WriteLine("\"Information Criteria\"");
            Console.WriteLine("Akaike (AIC):           {0:F3}", model.GetAkaikeInformationCriterion());
            Console.WriteLine("Corrected AIC:          {0:F3}", model.GetCorrectedAkaikeInformationCriterion());
            Console.WriteLine("Bayesian (BIC):         {0:F3}", model.GetBayesianInformationCriterion());
            Console.WriteLine("Chi Square:             {0:F3}", model.GetChiSquare());
            Console.WriteLine();

            Console.Write("Press any key to exit.");
            Console.ReadLine();
        }

        /// <summary>
        /// Reads the data from a text file into a <see cref="VariableCollection"/>.
        /// </summary>
        /// <returns>A <see cref="VariableCollection"/></returns>
        public static DataFrame<long, string> ReadAttendanceData()
        {
            DelimitedTextMatrixReader reader = new DelimitedTextMatrixReader(@"..\..\..\..\data\PoissonReg.csv");
            reader.StartRow = 1;
            reader.SetColumnDelimiters(new char[] { ',' });
            reader.SetRowDelimiters(new char[] { '\r', '\n' });
            reader.MergeConsecutiveDelimiters = false;
            var m = reader.ReadMatrix();
            var columnIndex = Index.Create(new string[] { 
                "id", "school", "male", "math", "langarts", "daysatt", "daysabs" });
            return m.ToDataFrame(Index.Default(m.RowCount), columnIndex);
            
        }

        /// <summary>
        /// Reads the data from a text file into a <see cref="VariableCollection"/>.
        /// </summary>
        /// <returns>A <see cref="VariableCollection"/></returns>
        public static DataFrame<long, string> ReadGraduateData()
        {
            FixedWidthMatrixReader reader = new FixedWidthMatrixReader(@"..\..\..\..\data\probit.dat");
            reader.StartRow = 1;
            reader.SetColumnBreaks(9, 18, 27);
            var m = reader.ReadMatrix();
            var columnIndex = Index.Create(new string[] { "admit", "gre", "topnotch", "gpa" });
            return m.ToDataFrame(Index.Default(m.RowCount), columnIndex);
        }
    }
}