New Version 7.0!

Try it for free with our fully functional 60-day trial version.

Download now!

Get from Nuget

QuickStart Samples

Sorting and Filtering QuickStart Sample (Visual Basic)

Illustrates how to sort and filter data used for data analysis in Visual Basic.

C# code F# code IronPython code Back to QuickStart Samples

Option Infer On

Imports System.Data.OleDb

Imports Extreme.DataAnalysis
Imports Extreme.Mathematics
Imports Extreme.Statistics

Namespace Extreme.Numerics.QuickStart.VB
    ' Illustrates sorting and filtering of data sets and variables.
    Module SortingAndFiltering

        Sub Main()
            Dim timeSeries = LoadTimeSeriesData()
            Dim dates = timeSeries.RowIndex

            ' The following are all equivalent ways of getting
            ' a strongly typed vector from a data frame
            Dim open = timeSeries("Open").As(Of Double)()
            Dim close = timeSeries.GetColumn("Close")
            Dim high = timeSeries.GetColumn(Of Double)("High")
            Dim low = CType(timeSeries("Low"), Vector(Of Double))

            Dim volume = timeSeries("Volume").As(Of Double)()

            ' Let's print some basic statistics for the full data set:
            Console.WriteLine("Total # observations: {0}", timeSeries.RowCount)
            Console.WriteLine("Average volume: {0:F0}", volume.Mean())
            Console.WriteLine("Total volume: {0:F0}", volume.Sum())

            '
            ' Filtering
            '

            ' Use the GetRows method to select subsets of rows.

            ' You can use a sequence of keys
            Dim subset = timeSeries.GetRows(
                {New DateTime(2000, 3, 1), New DateTime(2000, 3, 2)})

            ' When the index Is sorted, you can use a range
            subset = timeSeries.GetRows(
                New DateTime(2000, 1, 1), New DateTime(2010, 1, 1))

            ' Another option Is to use a boolean mask. Here we select
            ' observations where the close price was greater 
            ' than the open price
            Dim Filter = Vector.GreaterThan(close, open)
            ' Then we can use the GetRows method
            subset = timeSeries.GetRows(Filter)
            ' Data Is now filtered
            Console.WriteLine("Filtered # observations: {0}", subset.RowCount)

            ' Masks can be combined using logical operations
            Dim volumeFilter = volume.Map(Function(x) 200000000.0 <= x And x < 300000000.0)
            Console.WriteLine("Volume filtered #: {0}", volumeFilter.CountTrue())
            Dim intersection = Vector.And(volumeFilter, Filter)
            Dim union = Vector.Or(volumeFilter, Filter)
            Dim negation = Vector.Not(Filter)

            Console.WriteLine("Combined filtered #: {0}", intersection.CountTrue())
            subset = timeSeries.GetRows(intersection)

            ' When the row index Is ordered, it Is possible
            ' to get the rows with the key nearest to the 
            ' supplied keys
            Dim startDate = New DateTime(2001, 1, 1, 3, 0, 0)
            Dim offsetDates = Index.CreateDateRange(startDate,
                100, Recurrence.Daily)
            subset = timeSeries.GetNearestRows(offsetDates, Direction.Forward)

            '
            ' Sorting
            '

            ' The simplest way to sort data Is calling the Sort method 
            ' with the name of the variable to sort on
            Dim sortedSeries = timeSeries.SortBy("High", SortOrder.Descending)
            Dim highSorted = sortedSeries.GetColumn("High")(New Range(0, 4))
            Console.WriteLine("Largest 'High' values:")
            Console.WriteLine(highSorted.ToString("F2"))

            ' If you just want the largest few items in a series,
            ' you can use the Top Or Bottom method
            Console.WriteLine(high.Top(5).ToString("F2"))

            Console.Write("Press any key to exit.")
            Console.ReadLine()
        End Sub

        Private Function LoadTimeSeriesData() As DataFrame(Of Date, String)
            Dim filename As String = "..\..\..\Data\MicrosoftStock.xls"
            Dim connectionString As String = "Provider=Microsoft.Jet.OLEDB.4.0;Data Source=" + filename + ";Extended Properties=""Excel 8.0;HDR=Yes;IMEX=1"""
            Dim cnn As OleDbConnection = Nothing
            Dim ds As DataSet = New DataSet
            Try
                cnn = New OleDbConnection(connectionString)
                cnn.Open()
                Dim adapter As OleDbDataAdapter = New OleDbDataAdapter("Select * from [MicrosoftStock$]", cnn)
                adapter.Fill(ds)
            Catch ex As OleDbException
                Console.WriteLine(ex.InnerException)

            Finally
                If Not (cnn Is Nothing) Then
                    cnn.Close()
                End If
            End Try
            Return DataFrame.FromDataTable(Of Date)(ds.Tables(0), "Date")
        End Function

    End Module

End Namespace