Illustrates how to sort and filter data used for data analysis in Visual Basic.
Option Infer On
Imports Extreme.Data.Text
Imports Extreme.DataAnalysis
Imports Extreme.Mathematics
Imports Extreme.Statistics
' Illustrates sorting and filtering of data sets and variables.
Module SortingAndFiltering
Sub Main()
' The license is verified at runtime. We're using
' a demo license here. For more information, see
' https://www.extremeoptimization.com/trialkey
Extreme.License.Verify("Demo license")
' We load the data into a data frame with a DateTime row index
Dim timeSeries = DelimitedTextFile.ReadDataFrame(Of DateTime)(
"..\..\..\..\Data\MicrosoftStock.csv", "Date")
Dim dates = timeSeries.RowIndex
' The following are all equivalent ways of getting
' a strongly typed vector from a data frame
Dim open = timeSeries("Open").As(Of Double)()
Dim close = timeSeries.GetColumn("Close")
Dim high = timeSeries.GetColumn(Of Double)("High")
Dim low = CType(timeSeries("Low"), Vector(Of Double))
Dim volume = timeSeries("Volume").As(Of Double)()
' Let's print some basic statistics for the full data set:
Console.WriteLine("Total # observations: {0}", timeSeries.RowCount)
Console.WriteLine("Average volume: {0:F0}", volume.Mean())
Console.WriteLine("Total volume: {0:F0}", volume.Sum())
'
' Filtering
'
' Use the GetRows method to select subsets of rows.
' You can use a sequence of keys
Dim subset = timeSeries.GetRows(
{New DateTime(2000, 3, 1), New DateTime(2000, 3, 2)})
' When the index Is sorted, you can use a range
subset = timeSeries.GetRows(
New DateTime(2000, 1, 1), New DateTime(2010, 1, 1))
' Another option Is to use a boolean mask. Here we select
' observations where the close price was greater
' than the open price
Dim Filter = Vector.GreaterThan(close, open)
' Then we can use the GetRows method
subset = timeSeries.GetRows(Filter)
' Data Is now filtered
Console.WriteLine("Filtered # observations: {0}", subset.RowCount)
' Masks can be combined using logical operations
Dim volumeFilter = volume.Map(Function(x) 200000000.0 <= x And x < 300000000.0)
Console.WriteLine("Volume filtered #: {0}", volumeFilter.CountTrue())
Dim intersection = Vector.And(volumeFilter, Filter)
Dim union = Vector.Or(volumeFilter, Filter)
Dim negation = Vector.Not(Filter)
Console.WriteLine("Combined filtered #: {0}", intersection.CountTrue())
subset = timeSeries.GetRows(intersection)
' When the row index Is ordered, it Is possible
' to get the rows with the key nearest to the
' supplied keys
Dim startDate = New DateTime(2001, 1, 1, 3, 0, 0)
Dim offsetDates = Index.CreateDateRange(startDate,
100, Recurrence.Daily)
subset = timeSeries.GetNearestRows(offsetDates, Direction.Forward)
'
' Sorting
'
' The simplest way to sort data Is calling the Sort method
' with the name of the variable to sort on
Dim sortedSeries = timeSeries.SortBy("High", SortOrder.Descending)
Dim highSorted = sortedSeries.GetColumn("High")(New Range(0, 4))
Console.WriteLine("Largest 'High' values:")
Console.WriteLine(highSorted.ToString("F2"))
' If you just want the largest few items in a series,
' you can use the Top Or Bottom method
Console.WriteLine(high.Top(5).ToString("F2"))
Console.Write("Press any key to exit.")
Console.ReadLine()
End Sub
End Module