New Version 6.0!

Try it for free with our fully functional 60-day trial version.

Download now!

QuickStart Samples

Logistic Regression QuickStart Sample (Visual Basic)

Illustrates how to use the LogisticRegressionModel class to create logistic regression models in Visual Basic.

C# code F# code IronPython code Back to QuickStart Samples

Option Infer On

Imports System.IO

Imports Extreme.DataAnalysis
Imports Extreme.Mathematics
Imports Extreme.Mathematics.LinearAlgebra.IO
Imports Extreme.Statistics
Imports Extreme.Statistics.Tests

Namespace Extreme.Numerics.QuickStart.VB
    ' Illustrates building logistic regression models using 
    ' the LogisticRegressionModel class in the 
    ' Extreme.Statistics namespace of the Extreme
    ' Optimization Numerical Libraries for .NET.
    Module LogisticRegression

        Sub Main()
            ' Logistic regression can be performed using 
            ' the LogisticRegressionModel class.
            ' This QuickStart sample uses data from a study of factors
            ' that determine low birth weight at Baystate Medical Center.
            ' from Belsley, Kuh and Welsch. The fields are as follows:
            '   AGE:  Mother's age.
            '   LWT:  Mother's weight.
            '   RACE: 1=white, 2=black, 3=other.
            '   FVT:  Number of physician visits during the 1st trimester.
            '   LOW:  Low birth weight indicator.

            ' First, read the data from a file into an ADO.NET DataTable. 
            ' For the sake of clarity, we put this code in its own method.
            Dim table As DataTable = ReadData()
            If table Is Nothing Then Exit Sub

            Dim data = DataFrame.FromDataTable(table)

            ' We need indicator variables for the race. All we need to do
            ' is mark the variable as categorical:
            data.MakeCategorical("RACE", Index.Create({1, 2, 3}))

            ' Now create the regression model. Parameters are the name 
            ' of the dependent variable, a string array containing 
            ' the names of the independent variables, and the data frame
            ' containing all variables.

            ' Note that RACE, which is a categorical variable, is automatically
            ' expanded into indicator variables.
            Dim model As LogisticRegressionModel = New LogisticRegressionModel(data, "LOW",
                New String() {"AGE", "LWT", "RACE", "FTV"})

            ' Alternatively, we can use a formula to describe the variables
            ' in the model. The dependent variable goes on the left, the
            ' independent variables on the right of the ~
            model = New LogisticRegressionModel(data, "LOW ~ AGE + LWT + RACE + FTV")

            ' The Compute method performs the actual regression analysis.

            ' The Parameters collection contains information about the regression 
            ' parameters.
            Console.WriteLine("Variable              Value    Std.Error  t-stat  p-Value")
            For Each parameter As Parameter In model.Parameters
                ' Parameter objects have the following properties:
                ' Name, usually the name of the variable:
                ' Estimated value of the parameter:
                ' Standard error:
                ' The value of the t statistic for the hypothesis that the parameter is zero.
                ' Probability corresponding to the t statistic.
                Console.WriteLine("{0,-20}{1,10:F5}{2,10:F5}{3,8:F2} {4,7:F4}", _
                    parameter.Name, _
                    parameter.Value, _
                    parameter.StandardError, _
                    parameter.Statistic, _

            ' The log-likelihood of the computed solution is also available:
            Console.WriteLine("Log-likelihood: {0:F4}", model.LogLikelihood)

            ' We can test the significance by looking at the results
            ' of a log-likelihood test, which compares the model to
            ' a constant-only model:
            Dim lrt As SimpleHypothesisTest = model.GetLikelihoodRatioTest()
            Console.WriteLine("Likelihood-ratio test: chi-squared={0:F4}, p={1:F4}", lrt.Statistic, lrt.PValue)

            ' We can compute a model with fewer parameters:
            Dim model2 As LogisticRegressionModel = New LogisticRegressionModel(data, "LOW",
                New String() {"LWT", "RACE"})

            ' Print the results...
            Console.WriteLine("Variable              Value    Std.Error  t-stat  p-Value")
            For Each parameter As Parameter In model2.Parameters
                Console.WriteLine("{0,-20}{1,10:F5}{2,10:F5}{3,8:F2} {4,7:F4}",
                    parameter.Name, parameter.Value, parameter.StandardError,
                    parameter.Statistic, parameter.PValue)
                ' ...including the log-likelihood:

            Console.WriteLine("Log-likelihood: {0:F4}", model2.LogLikelihood)

            ' We can now compare the original model to this one, once again
            ' using the likelihood ratio test:
            lrt = model.GetLikelihoodRatioTest(model2)
            Console.WriteLine("Likelihood-ratio test: chi-squared={0:F4}, p={1:F4}", lrt.Statistic, lrt.PValue)

            ' Multinomial (polytopous) logistic regression

            ' The LogisticRegressionModel class can also be used
            ' for logistic regression with more than 2 responses.
            ' The following example is from "Applied Linear Statistical
            ' Models."

            ' Load the data into a matrix
            Dim reader As FixedWidthMatrixReader = New FixedWidthMatrixReader(
                0, New Integer() {5, 10, 15, 20, 25, 32, 37, 42, 47},
                System.Globalization.NumberStyles.Integer, Nothing)
            Dim m = reader.ReadMatrix()

            ' Next, convert the columns to variables.

            ' For multinomial regression, the response variable must be
            ' a CategoricalVariable:
            Dim duration = m.GetColumn(1).AsCategorical()
            Dim nutritio = m.GetColumn(5).WithName("nutritio")
            Dim agecat1 = m.GetColumn(6).WithName("agecat1")
            Dim agecat3 = m.GetColumn(7).WithName("agecat3")
            Dim alcohol = m.GetColumn(8).WithName("alcohol")
            Dim smoking = m.GetColumn(9).WithName("smoking")

            ' The constructor takes an extra argument of type
            ' LogisticRegressionMethod:
            Dim model3 As New LogisticRegressionModel(duration,
                {nutritio, agecat1, agecat3, alcohol, smoking},

            ' When using a formula, we can use '.' as a shortcut 
            ' for all unused variables in the data frame.
            ' Because duration has 3 levels, nominal logistic regression
            ' Is automatically inferred.
            Dim data3 = DataFrame.FromColumns(
                {duration, nutritio, agecat1, agecat3, alcohol, smoking},
                {"duration", "nutritio", "agecat1", "agecat3", "alcohol", "smoking"})
            model3 = New LogisticRegressionModel(data3, "duration ~ .")

            ' Everything else is the same:

            ' There is a set of parameters for each level of the
            ' response variable. The highest level is the reference 
            ' level and has no associated parameters.
            For Each p As Parameter In model3.Parameters

            Console.WriteLine("Log likelihood:  {0:F4}", model3.LogLikelihood)

            ' To test the hypothesis that all the slopes are zero,
            ' use the GetLikelihoodRatioTest method.
            lrt = model3.GetLikelihoodRatioTest()
            Console.WriteLine("Test that all slopes are zero: chi-squared={0:F4}, p={1:F4}", lrt.Statistic, lrt.PValue)

            Console.WriteLine("Press Enter key to continue.")
        End Sub

        ' Reads the data from a text file into a <see cref="DataTable"/>.
        ' Returns a DataTable.
        Public Function ReadData() As DataTable
            Dim data As DataTable = New DataTable("savings")

            Dim whitespace As Char() = New Char() {" "c, Chr(9)}
            Dim sr As StreamReader
                sr = New StreamReader("..\..\..\Data\lowbwt.txt")
            Catch ex As FileNotFoundException
                Console.WriteLine("The data file could not be found. Please verify that the path is correct.")
                Return Nothing
            End Try

            ' Read the header and extract the field names.
            Dim line As String = sr.ReadLine()
            Dim pos As Integer = 0
            Dim pos2 As Integer
                Do While (Char.IsWhiteSpace(line.Chars(pos)))
                    pos = pos + 1
                pos2 = line.IndexOfAny(whitespace, pos)
                If (pos2 < 0) Then
                    data.Columns.Add(line.Substring(pos), GetType(Double))
                    Exit Do
                    data.Columns.Add(line.Substring(pos, pos2 - pos), GetType(Double))
                End If
                pos = pos2
            Loop While (pos >= 0)

            ' Now read the data and add them to the table.
            ' Assumes all columns except the first are numerical.
            Dim rowData As Object() = New Object(data.Columns.Count - 1) {}
            line = sr.ReadLine()
            Do While (Not (line Is Nothing) AndAlso line.Length > 0)

                Dim column As Integer = 0
                pos = 0
                    Dim field As String
                    Do While (Char.IsWhiteSpace(line.Chars(pos)))
                        pos = pos + 1
                    pos2 = line.IndexOfAny(whitespace, pos)
                    If (pos2 < 0) Then
                        field = line.Substring(pos)
                        field = line.Substring(pos, pos2 - pos)
                    End If
                    If (column = 0) Then
                        rowData(column) = field
                        rowData(column) = Double.Parse(field)
                    End If
                    column = column + 1
                    pos = pos2
                Loop While (pos >= 0 And column < data.Columns.Count)

                line = sr.ReadLine()
            Return data
        End Function
    End Module

End Namespace