New Version 6.0!

Try it for free with our fully functional 60-day trial version.

Download now!

QuickStart Samples

Generalized Linear Models QuickStart Sample (Visual Basic)

Illustrates how to use the GeneralizedLinearModel class to compute probit, Poisson and similar regression models in Visual Basic.

C# code F# code Back to QuickStart Samples

Option Infer On

Imports System
Imports System.IO

Imports Extreme.DataAnalysis
Imports Extreme.Mathematics
Imports Extreme.Mathematics.LinearAlgebra.IO
Imports Extreme.Statistics

Namespace Extreme.Numerics.QuickStart.VB

    ' Illustrates building generalized linear models imports 
    ' the GeneralizedLinearModel class in the 
    ' Extreme.Statistics namespace of the Extreme
    ' Optimization Numerical Libraries for .NET.
    Module GoodnessOfFitTests

        Sub Main()
            ' Generalized linear models can be computed imports 
            ' the GeneralizedLinearModel class.

            '
            ' Poisson regression
            '

            ' This QuickStart sample uses data about the attendance of 316 students
            ' from two urban high schools. The fields are as follows:
            '   daysabs: The number of days the student was absent.
            '   male:    A binary indicator of gender.
            '   math:    The student's standardized math score.
            '   langarts:The student's standardized language arts score.
            '
            ' We want to investigate the relationship between these variables.
            ' 
            ' See http:'www.ats.ucla.edu/stat/stata/dae/poissonreg.htm

            ' First, read the data from a file into a VariableCollection.
            ' The ReadAttendanceData method is defined later in this file.
            Dim data = ReadAttendanceData()

            ' Now create the regression model. Parameters are the name 
            ' of the dependent variable, a string array containing 
            ' the names of the independent variables, and the VariableCollection
            ' containing all variables.
            Dim model As New GeneralizedLinearModel(data, "daysabs", New String() {"math", "langarts", "male"})

            ' Alternatively, we can use a formula to describe the variables
            ' in the model. The dependent variable goes on the left, the
            ' independent variables on the right of the ~
            model = New GeneralizedLinearModel(data, "daysabs ~ math + langarts + male")


            ' The ModelFamily specifies the distribution of the dependent variable.
            ' Since we're dealing with count data, we use a Poisson model:
            model.ModelFamily = ModelFamily.Poisson

            ' The LinkFunction specifies the relationship between the dependent variable
            ' and the linear predictor of independent variables. In this case,
            ' we use the canonical link function, which is the default.
            model.LinkFunction = ModelFamily.Poisson.CanonicalLinkFunction

            ' The Compute method performs the actual regression analysis.
            model.Compute()

            ' The Parameters collection contains information about the regression 
            ' parameters.
            Console.WriteLine("Variable              Value    Std.Error    z     p-Value")
            For Each param As Parameter In model.Parameters
                ' Parameter objects have the following properties:
                ' Name, usually the name of the variable:
                ' Estimated value of the param:
                ' Standard error:
                ' The value of the z score for the hypothesis that the param is zero.
                Console.WriteLine("{0,-20}{1,10:F6}{2,10:F6}{3,8:F2} {4,7:F5}", _
                    param.Name, param.Value, param.StandardError, param.Statistic, _
                    param.PValue)
            Next
            Console.WriteLine()

            ' In addition to these properties, Parameter objects have a GetConfidenceInterval
            ' method that returns a confidence interval at a specified confidence level.
            ' Notice that individual parameters can be accessed imports their numeric index.
            ' Parameter 0 is the intercept, if it was included.
            Dim confidenceInterval As Interval = model.Parameters(0).GetConfidenceInterval(0.95)
            Console.WriteLine("95% confidence interval for math score: {0:F4} - {1:F4}", _
                confidenceInterval.LowerBound, confidenceInterval.UpperBound)

            ' Parameters can also be accessed by name:
            confidenceInterval = model.Parameters.Get("math").GetConfidenceInterval(0.95)
            Console.WriteLine("95% confidence interval for math score: {0:F4} - {1:F4}", _
                confidenceInterval.LowerBound, confidenceInterval.UpperBound)
            Console.WriteLine()

            ' There is also a wealth of information about the analysis available
            ' through various properties of the GeneralizedLinearModel object:
            Console.WriteLine("Log likelihood:         {0:F4}", model.LogLikelihood)
            Console.WriteLine("Kernel log likelihood:  {0:F4}", model.GetKernelLogLikelihood())

            ' Note that some statistical applications (notably stata) use 
            ' a different definition of some of these "information criteria":
            Console.WriteLine("""Information Criteria""")
            Console.WriteLine("Akaike (AIC):           {0:F3}", model.GetAkaikeInformationCriterion())
            Console.WriteLine("Corrected AIC:          {0:F3}", model.GetCorrectedAkaikeInformationCriterion())
            Console.WriteLine("Bayesian (BIC):         {0:F3}", model.GetBayesianInformationCriterion())
            Console.WriteLine("Chi Square:             {0:F3}", model.GetChiSquare())
            Console.WriteLine()

            '
            ' Probit regression
            ' 

            ' In a second example, we investigate the relationship between whether a student
            ' graduates, and the student's GRE scores,grade point averages, the level 
            ' of the school from a "top notch" school. The fields are as follows:
            '   admit:    Dependent variable
            '   gre:      The student's GRE score.
            '   topnotch: A binary indicator of the type of school
            '   gpa:      The student's Grade Point Average.
            ' 
            ' The data was generated.
            ' See http:'www.ats.ucla.edu/stat/stata/dae/probit.htm

            ' First, read the data from a file into a VariableCollection.
            ' The ReadGraduateData method is defined later in this file.
            data = ReadGraduateData()

            ' Now create the regression model. Parameters are the name 
            ' of the dependent variable, a string array containing 
            ' the names of the independent variables, and the VariableCollection
            ' containing all variables.
            Model = New GeneralizedLinearModel(data, "admit", New String() {"gre", "topnotch", "gpa"})

            ' The ModelFamily specifies the distribution of the dependent variable.
            ' Since we're dealing with binary data, we use a Binomial model:
            model.ModelFamily = ModelFamily.Binomial

            ' We use the probit link function.
            model.LinkFunction = LinkFunction.Probit

            ' The Compute method performs the actual regression analysis.
            model.Compute()

            ' The Parameters collection contains information about the regression 
            ' parameters.
            Console.WriteLine("Variable              Value    Std.Error    z     p-Value")
            For Each param As Parameter In model.Parameters
                Console.WriteLine("{0,-20}{1,10:F6}{2,10:F6}{3,8:F2} {4,7:F5}", _
                    param.Name, param.Value, param.StandardError, param.Statistic, _
                    param.PValue)
            Next
            Console.WriteLine()

            ' There is also a wealth of information about the analysis available
            ' through various properties of the GeneralizedLinearModel object:
            Console.WriteLine("Log likelihood:         {0:F4}", model.LogLikelihood)
            Console.WriteLine("Kernel log likelihood:  {0:F4}", model.GetKernelLogLikelihood())

            ' Note that some statistical applications (notably stata) use 
            ' a different definition of some of these "information criteria":
            Console.WriteLine("""Information Criteria""")
            Console.WriteLine("Akaike (AIC):           {0:F3}", model.GetAkaikeInformationCriterion())
            Console.WriteLine("Corrected AIC:          {0:F3}", model.GetCorrectedAkaikeInformationCriterion())
            Console.WriteLine("Bayesian (BIC):         {0:F3}", Model.GetBayesianInformationCriterion())
            Console.WriteLine("Chi Square:             {0:F3}", Model.GetChiSquare())
            Console.WriteLine()

            Console.Write("Press any key to exit.")
            Console.ReadLine()
        End Sub

        ' <summary>
        ' Reads the data from a text file into a <see cref="VariableCollection"/>.
        ' </summary>
        ' <returns>A <see cref="VariableCollection"/></returns>
        Function ReadAttendanceData() As IDataFrame
            Dim reader As New DelimitedTextMatrixReader("..\..\..\data\PoissonReg.csv")
            reader.StartRow = 1
            reader.SetColumnDelimiters(New Char() {","c})
            reader.SetRowDelimiters(New Char() {Microsoft.VisualBasic.Chr(10), Microsoft.VisualBasic.Chr(10)})
            reader.MergeConsecutiveDelimiters = False
            Dim m = reader.ReadMatrix()
            m.ColumnIndex = Index.Create(New String() _
                {"id", "school", "male", "math", "langarts", "daysatt", "daysabs"})
            Return m
        End Function

        ' <summary>
        ' Reads the data from a text file into a <see cref="VariableCollection"/>.
        ' </summary>
        ' <returns>A <see cref="IDataFrame"/></returns>
        Function ReadGraduateData() As IDataFrame
            Dim reader As New FixedWidthMatrixReader("..\..\..\data\probit.dat")
            reader.StartRow = 1
            reader.SetColumnBreaks(9, 18, 27)
            Dim m = reader.ReadMatrix()
            m.ColumnIndex = Index.Create({"admit", "gre", "topnotch", "gpa"})
            Return m
        End Function
    End Module
End Namespace