Data Analysis Mathematics Linear Algebra Statistics
New Version 6.0!

Try it for free with our fully functional 60-day trial version.

QuickStart Samples

# Sorting and Filtering QuickStart Sample (Visual Basic)

Illustrates how to sort and filter data used for data analysis in Visual Basic.

```Option Infer On

Imports System.Data.OleDb

Imports Extreme.DataAnalysis
Imports Extreme.Mathematics
Imports Extreme.Statistics

Namespace Extreme.Numerics.QuickStart.VB
' Illustrates sorting and filtering of data sets and variables.
Module SortingAndFiltering

Sub Main()
Dim timeSeries = LoadTimeSeriesData()
Dim dates = timeSeries.RowIndex

' The following are all equivalent ways of getting
' a strongly typed vector from a data frame
Dim open = timeSeries("Open").As(Of Double)()
Dim close = timeSeries.GetColumn("Close")
Dim high = timeSeries.GetColumn(Of Double)("High")
Dim low = CType(timeSeries("Low"), Vector(Of Double))

Dim volume = timeSeries("Volume").As(Of Double)()

' Let's print some basic statistics for the full data set:
Console.WriteLine("Total # observations: {0}", timeSeries.RowCount)
Console.WriteLine("Average volume: {0:F0}", volume.Mean())
Console.WriteLine("Total volume: {0:F0}", volume.Sum())

'
' Filtering
'

' Use the GetRows method to select subsets of rows.

' You can use a sequence of keys
Dim subset = timeSeries.GetRows(
{New DateTime(2000, 3, 1), New DateTime(2000, 3, 2)})

' When the index Is sorted, you can use a range
subset = timeSeries.GetRows(
New DateTime(2000, 1, 1), New DateTime(2010, 1, 1))

' Another option Is to use a boolean mask. Here we select
' observations where the close price was greater
' than the open price
Dim Filter = Vector.GreaterThan(close, open)
' Then we can use the GetRows method
subset = timeSeries.GetRows(Filter)
' Data Is now filtered
Console.WriteLine("Filtered # observations: {0}", subset.RowCount)

' Masks can be combined using logical operations
Dim volumeFilter = volume.Map(Function(x) 200000000.0 <= x And x < 300000000.0)
Console.WriteLine("Volume filtered #: {0}", volumeFilter.CountTrue())
Dim intersection = Vector.And(volumeFilter, Filter)
Dim union = Vector.Or(volumeFilter, Filter)
Dim negation = Vector.Not(Filter)

Console.WriteLine("Combined filtered #: {0}", intersection.CountTrue())
subset = timeSeries.GetRows(intersection)

' When the row index Is ordered, it Is possible
' to get the rows with the key nearest to the
' supplied keys
Dim startDate = New DateTime(2001, 1, 1, 3, 0, 0)
Dim offsetDates = Index.CreateDateRange(startDate,
100, Recurrence.Daily)
subset = timeSeries.GetNearestRows(offsetDates, Direction.Forward)

'
' Sorting
'

' The simplest way to sort data Is calling the Sort method
' with the name of the variable to sort on
Dim sortedSeries = timeSeries.SortBy("High", SortOrder.Descending)
Dim highSorted = sortedSeries.GetColumn("High")(New Range(0, 4))
Console.WriteLine("Largest 'High' values:")
Console.WriteLine(highSorted.ToString("F2"))

' If you just want the largest few items in a series,
' you can use the Top Or Bottom method
Console.WriteLine(high.Top(5).ToString("F2"))

Console.Write("Press any key to exit.")
Console.ReadLine()
End Sub

Private Function LoadTimeSeriesData() As DataFrame(Of Date, String)
Dim filename As String = "..\..\..\Data\MicrosoftStock.xls"
Dim connectionString As String = "Provider=Microsoft.Jet.OLEDB.4.0;Data Source=" + filename + ";Extended Properties=""Excel 8.0;HDR=Yes;IMEX=1"""
Dim cnn As OleDbConnection = Nothing
Dim ds As DataSet = New DataSet
Try
cnn = New OleDbConnection(connectionString)
cnn.Open()
Dim adapter As OleDbDataAdapter = New OleDbDataAdapter("Select * from [MicrosoftStock\$]", cnn)
adapter.Fill(ds)
Catch ex As OleDbException
Console.WriteLine(ex.InnerException)

Finally
If Not (cnn Is Nothing) Then
cnn.Close()
End If
End Try
Return DataFrame.FromDataTable(Of Date)(ds.Tables(0), "Date")
End Function

End Module

End Namespace```