New Version 6.0!

Try it for free with our fully functional 60-day trial version.

Download now!

QuickStart Samples

Grouping and Aggregation QuickStart Sample (Visual Basic)

Illustrates how to group data and how to compute aggregates over groups and entire datasets. in Visual Basic.

C# code F# code Back to QuickStart Samples

Option Infer On

Imports Extreme.DataAnalysis
Imports Extreme.Mathematics

Namespace Extreme.Numerics.QuickStart.CSharp

    ' <summary>
    ' Illustrates how to group data And how to compute aggregates 
    ' over groups And entire datasets.
    ' </summary>
    Module GroupingAndAggregation

        Sub Main()

            ' We work with the Titanic dataset
            Dim titanic = DataFrame.ReadCsv("..\..\..\..\data\titanic.csv")
            ' We'll use these columns often:
            Dim age = titanic.GetColumn("Age")
            Dim survived = titanic("Survived").As(Of Boolean)()
            ' We want to group by the passenger class,
            ' so we make this a categorical vector.
            Dim pclass = titanic("Pclass").AsCategorical()

            ' Aggregators And Aggregation

            ' The Aggregators class defines all common aggregator functions.
            ' The Aggregate method applies the aggregator
            ' to every column in the data frame
            Dim means = titanic.Aggregate(Aggregators.Mean)

            ' We can create custom aggregators. Here we compute
            ' the fraction of true values of a boolean vector
            Dim trueFraction = Aggregators.Create("survived",
                Function(b As Vector(Of Boolean)) CDbl(b.CountTrue()) / b.Count)
            Dim pctSurvived = survived.Aggregate(trueFraction)

            ' We can also compute more than one aggregate
            Dim descriptives = titanic.Aggregate(

            ' Aggregations can be applied to individual vectors
            Dim meanAge = age.Aggregate(Aggregators.Mean)

            ' Or to rows Or columns of a matrix
            Dim m = Matrix.CreateRandom(5, 8)
            Dim meanByRow = m.AggregateRows(Aggregators.Mean)
            Dim meanByColumn = m.AggregateColumns(Aggregators.Mean)

            ' Groupings

            ' By defining a grouping, we can compute the aggregate
            ' for each group.

            ' The simplest grouping Is by value, similar to 
            ' GROUP BY clauses in database queries.

            ' Let's get the average age by class:
            Dim ageByClass = age.AggregateBy(pclass, Aggregators.Mean)

            ' Grouping by quantile means we sort the values
            ' And divide the result into groups of the same size.
            Dim byQuantile = Grouping.ByQuantile(age, 5)
            Dim survivedByAgeGroup = survived.AggregateBy(byQuantile, trueFraction)
            Console.WriteLine("Survival rate by age group:")

            ' For the remainder we will use a vector with a DateTime index
            Dim x = Vector.CreateRandom(200)
            Dim dates = Index.CreateDateRange(New DateTime(2016, 1, 1), x.Length)
            x.Index = dates

            ' A partition Is a straight division of the data into equal groups
            Dim partition = Grouping.Partition(dates, 10,
                alignToEnd:=True, skipIncomplete:=True)
            Dim partitionAvg = x.AggregateBy(partition, Aggregators.Mean)
            Console.WriteLine("Avg. by partition:")

            ' Moving And expanding windows

            ' Moving Or rolling averages And related statistics 
            ' can be computed efficiently by using moving windows
            Dim window = Grouping.Window(dates, 20)
            Dim ma20 = x.AggregateBy(window, Aggregators.Mean)
            Console.WriteLine(ma20.GetSlice(0, 20))
            ' Moving standard deviation Is just as simple
            Dim mstd20 = x.AggregateBy(window, Aggregators.StandardDeviation)
            Console.WriteLine(mstd20.GetSlice(0, 20))

            ' Moving windows can have a fixed number of elements, as above,
            ' Or a fixed maximum width
            Dim window2 = Grouping.RangeWindow(dates, TimeSpan.FromDays(20))
            Dim ma20_2 = x.AggregateBy(window2, Aggregators.Mean)

            ' Expanding windows keep the starting point And move the end point
            ' forward in time
            Dim expanding = Grouping.ExpandingWindow(dates)
            Dim expAvg = x.AggregateBy(expanding, Aggregators.Mean)
            Console.WriteLine(expAvg.GetSlice(0, 10))

            ' Resampling

            ' Resampling means computing values for a series 
            ' with longer periods by aggregating over the values
            ' for shorter periods.

            ' We start by creating an index with the boundaries,
            ' in this case the 10th of each month.
            Dim months = Index.CreateDateRange(New DateTime(2016, 1, 10),
                12, Recurrence.Monthly)
            ' We then create the resampling grouping from this:
            ' Giving the Direction argument as Backward means that
            ' the last value in the time period Is used as the key
            ' for the group.
            Dim resampling1 = Grouping.Resample(dates, months, Direction.Backward)
            ' We can also obtain this grouping in one step            
            Dim resampling2 = Grouping.Resample(dates,
                Recurrence.Monthly.Day(10), Direction.Backward)
            Dim resampled = x.AggregateBy(resampling2, Aggregators.Mean)

            ' Pivot tables

            ' A pivot table Is a 2-dimensional grouping on two key columns.
            ' For this, we go back to the Titanic dataset, And we compute 
            ' the survival rate per class in a different way. We group
            ' by class And by whether the passenger survived
            Dim Pivot = Grouping.Pivot(
                titanic("Pclass").As(Of Integer)(),
                titanic("Survived").As(Of Boolean)())
            ' We can then get the # of elements in each group
            ' as a matrix, with rows indexed by class And columns
            ' indexed by survived
            Dim counts = Pivot.CountsMatrix()
            ' Scaling by the row sums gives us the fraction
            ' of survived/did Not survive for each class
            Dim fractions = counts.UnscaleRowsInPlace(counts.GetRowSums())

            Console.Write("Press any key to exit.")

        End Sub

    End Module

End Namespace