New Version 7.0!

Try it for free with our fully functional 60-day trial version.

Download now!

Get from Nuget

QuickStart Samples

Cluster Analysis QuickStart Sample (Visual Basic)

Illustrates how to use the classes in the Extreme.Statistics.Multivariate namespace to perform hierarchical clustering and K-means clustering in Visual Basic.

C# code F# code IronPython code Back to QuickStart Samples

Option Infer On

Imports System.Linq

Imports Extreme.DataAnalysis
Imports Extreme.Mathematics
Imports Extreme.Statistics
Imports Extreme.Statistics.IO
Imports Extreme.Statistics.Multivariate

Namespace Extreme.Numerics.QuickStart.VB

    ' <summary>
    ' Demonstrates how to use classes that implement
    ' hierarchical and K-means clustering.
    ' </summary>
    Module ClusterAnalysis

        Sub Main()
            ' This QuickStart Sample demonstrates how to run two
            ' common multivariate analysis techniques:
            ' hierarchical cluster analysis and K-means cluster analysis.
            '
            ' The classes used in this sample reside in the
            ' Extreme.Statistics.Multivariate namespace..

            ' First, our dataset, which is from
            '     Computer-Aided Multivariate Analysis, 4th Edition
            '     by A. A. Afifi, V. Clark and S. May, chapter 16
            '     See http:'www.ats.ucla.edu/stat/Stata/examples/cama4/default.htm
            Dim frame = New StataFileReader("..\..\..\Data\companies.dta").ReadDataFrame()

            ' 
            ' Hierarchical cluster analysis
            '

            Console.WriteLine("Hierarchical clustering")

            ' Create the model:
            Dim columns = {"ror5", "de", "salesgr5", "eps5", "npm1", "pe", "payoutr1"}
            Dim hc = New HierarchicalClusterAnalysis(frame, columns)
            ' Alternatively, we could use a formula to specify the variables
            Dim formula = "ror5 + de + salesgr5 + eps5 + npm1 + pe + payoutr1"
            hc = New HierarchicalClusterAnalysis(frame, formula)

            ' Rescale the variables to their Z-scores before doing the analysis:
            hc.Standardize = True
            ' The linkage method defaults to centroid:
            hc.LinkageMethod = LinkageMethod.Centroid
            ' We could set the distance measure. We use the default:
            hc.DistanceMeasure = DistanceMeasures.SquaredEuclideanDistance

            ' Compute the model:
            hc.Compute()

            ' We can partition the cases into clusters:
            Dim partition As HierarchicalClusterCollection = hc.GetClusterPartition(5)
            ' Individual clusters are accessed through an index, or through enumeration.            
            For Each cluster As HierarchicalCluster In partition
                Console.WriteLine("Cluster {0} has {1} members.", cluster.Index, cluster.Size)
            Next

            ' And get the indexes of the observations in a single cluster:
            Dim indexes = partition(3).MemberIndexes
            Console.WriteLine("Number of items in the partition: {0}", indexes.Length)

            ' Get a variable that shows memberships:
            Dim memberships = partition.GetMemberships()
            For i As Integer = 15 To memberships.Length - 1
                Console.WriteLine("Observation {0} belongs to cluster {1}", i, memberships.GetLevelIndex(i))
            Next i

            ' A dendrogram is a graphical representation of the clustering in the form of a tree.
            ' You can get all the information you need to draw a dendrogram starting from 
            ' the root node of the dendrogram:
            Dim root As DendrogramNode = hc.DendrogramRoot
            ' Position and DistanceMeasure give the x and y coordinates:
            Console.WriteLine("Root position: ({0:F4}, {1:F4})", root.Position, root.DistanceMeasure)
            ' The left and right children:
            Console.WriteLine("Position of left child: {0:F4}", root.LeftChild.Position)
            Console.WriteLine("Position of right child: {0:F4}", root.RightChild.Position)

            ' You can also get a filter that defines a sort order suitable for
            ' drawing the dendrogram:
            Dim sortOrder = hc.GetDendrogramOrder()
            Console.WriteLine()

            '
            ' K-Means Clustering
            '

            Console.WriteLine("K-means clustering")

            ' Create the model. We need to specify the number of clusters up front:
            Dim kmc As New KMeansClusterAnalysis(frame, columns, 3)
            ' Rescale the variables to their Z-scores before doing the analysis:
            kmc.Standardize = True

            ' Compute the model:
            kmc.Compute()

            ' The Predictions property Is a categorical vector that contains
            ' the cluster assignments
            Dim predictions = kmc.Predictions
            ' The GetDistancesToCenters method returns a vector containing
            ' the distance of each observations to its center.
            Dim distances = kmc.GetDistancesToCenters()

            ' For example
            For i = 18 To predictions.Length - 1
                Console.WriteLine("Observation {0} belongs to cluster {1}, distance: {2:F4}.",
                    i, predictions(i), distances(i))
            Next
            ' You can use this to compute several statistics
            Dim Descriptives = distances.SplitBy(predictions).
                Map(Function(x) New Descriptives(Of Double)(x))

            ' Individual clusters are accessed through an index, Or through enumeration.            
            For i = 0 To Descriptives.Length - 1
                Console.WriteLine("Cluster {0} has {1} members. Sum of squares: {2:F4}",
                    i, Descriptives(i).Count, Descriptives(i).SumOfSquares)
                Console.WriteLine("Center: {0:F4}", kmc.Clusters(i))
            Next

            ' The distances between clusters are also available
            Console.WriteLine(kmc.GetClusterDistances().ToString("F4"))

            ' You can get a filter for the observations in a single cluster.
            ' This uses the GetIndexes method of categorical vectors.
            Dim level1Indexes = kmc.Predictions.GetIndexes(1).ToArray()
            Console.WriteLine("Number of items in cluster 1: {0}", level1Indexes.Length)

            Console.Write("Press any key to exit.")
            Console.ReadLine()
        End Sub

    End Module

End Namespace