New Version 6.0!

Try it for free with our fully functional 60-day trial version.

Download now!

QuickStart Samples

Sorting and Filtering QuickStart Sample (Visual Basic)

Illustrates how to sort and filter data used for data analysis in Visual Basic.

C# code F# code IronPython code Back to QuickStart Samples

Option Infer On

Imports System.Data.OleDb

Imports Extreme.DataAnalysis
Imports Extreme.Mathematics
Imports Extreme.Statistics

Namespace Extreme.Numerics.QuickStart.VB
    ' Illustrates sorting and filtering of data sets and variables.
    Module SortingAndFiltering

        Sub Main()
            Dim timeSeries = LoadTimeSeriesData()
            Dim dates = timeSeries.RowIndex

            ' The following are all equivalent ways of getting
            ' a strongly typed vector from a data frame
            Dim open = timeSeries("Open").As(Of Double)()
            Dim close = timeSeries.GetColumn("Close")
            Dim high = timeSeries.GetColumn(Of Double)("High")
            Dim low = CType(timeSeries("Low"), Vector(Of Double))

            Dim volume = timeSeries("Volume").As(Of Double)()

            ' Let's print some basic statistics for the full data set:
            Console.WriteLine("Total # observations: {0}", timeSeries.RowCount)
            Console.WriteLine("Average volume: {0:F0}", volume.Mean())
            Console.WriteLine("Total volume: {0:F0}", volume.Sum())

            '
            ' Filtering
            '

            ' Use the GetRows method to select subsets of rows.

            ' You can use a sequence of keys
            Dim subset = timeSeries.GetRows(
                {New DateTime(2000, 3, 1), New DateTime(2000, 3, 2)})

            ' When the index Is sorted, you can use a range
            subset = timeSeries.GetRows(
                New DateTime(2000, 1, 1), New DateTime(2010, 1, 1))

            ' Another option Is to use a boolean mask. Here we select
            ' observations where the close price was greater 
            ' than the open price
            Dim Filter = Vector.GreaterThan(close, open)
            ' Then we can use the GetRows method
            subset = timeSeries.GetRows(Filter)
            ' Data Is now filtered
            Console.WriteLine("Filtered # observations: {0}", subset.RowCount)

            ' Masks can be combined using logical operations
            Dim volumeFilter = volume.Map(Function(x) 200000000.0 <= x And x < 300000000.0)
            Console.WriteLine("Volume filtered #: {0}", volumeFilter.CountTrue())
            Dim intersection = Vector.And(volumeFilter, Filter)
            Dim union = Vector.Or(volumeFilter, Filter)
            Dim negation = Vector.Not(Filter)

            Console.WriteLine("Combined filtered #: {0}", intersection.CountTrue())
            subset = timeSeries.GetRows(intersection)

            ' When the row index Is ordered, it Is possible
            ' to get the rows with the key nearest to the 
            ' supplied keys
            Dim startDate = New DateTime(2001, 1, 1, 3, 0, 0)
            Dim offsetDates = Index.CreateDateRange(startDate,
                100, Recurrence.Daily)
            subset = timeSeries.GetNearestRows(offsetDates, Direction.Forward)

            '
            ' Sorting
            '

            ' The simplest way to sort data Is calling the Sort method 
            ' with the name of the variable to sort on
            Dim sortedSeries = timeSeries.SortBy("High", SortOrder.Descending)
            Dim highSorted = sortedSeries.GetColumn("High")(New Range(0, 4))
            Console.WriteLine("Largest 'High' values:")
            Console.WriteLine(highSorted.ToString("F2"))

            ' If you just want the largest few items in a series,
            ' you can use the Top Or Bottom method
            Console.WriteLine(high.Top(5).ToString("F2"))

            Console.Write("Press any key to exit.")
            Console.ReadLine()
        End Sub

        Private Function LoadTimeSeriesData() As DataFrame(Of Date, String)
            Dim filename As String = "..\..\..\Data\MicrosoftStock.xls"
            Dim connectionString As String = "Provider=Microsoft.Jet.OLEDB.4.0;Data Source=" + filename + ";Extended Properties=""Excel 8.0;HDR=Yes;IMEX=1"""
            Dim cnn As OleDbConnection = Nothing
            Dim ds As DataSet = New DataSet
            Try
                cnn = New OleDbConnection(connectionString)
                cnn.Open()
                Dim adapter As OleDbDataAdapter = New OleDbDataAdapter("Select * from [MicrosoftStock$]", cnn)
                adapter.Fill(ds)
            Catch ex As OleDbException
                Console.WriteLine(ex.InnerException)

            Finally
                If Not (cnn Is Nothing) Then
                    cnn.Close()
                End If
            End Try
            Return DataFrame.FromDataTable(Of Date)(ds.Tables(0), "Date")
        End Function

    End Module

End Namespace