Data Analysis Mathematics Linear Algebra Statistics
New Version 8.1!

Supports .NET 6.0. Try it for free with our fully functional 60-day trial version.

QuickStart Samples

# Sorting and Filtering QuickStart Sample (C#)

Illustrates how to sort and filter data used for data analysis in C#.

```using System;
using System.Data;
using System.Data.OleDb;

namespace Extreme.Numerics.QuickStart.CSharp
{
using Extreme.DataAnalysis;
using Extreme.Mathematics;
using Extreme.Statistics;

/// <summary>
/// Illustrates sorting and filtering of data frames.
/// </summary>
class SortingAndFiltering
{
/// <summary>
/// The main entry point for the application.
/// </summary>
static void Main(string[] args)
{
var timeSeries = LoadTimeSeriesData();
var date = timeSeries.RowIndex;

// The following are all equivalent ways of getting
// a strongly typed vector from a data frame:
var open = timeSeries["Open"].As<double>();
var close = timeSeries.GetColumn("Close");
var high = timeSeries.GetColumn<double>("High");
var low = (Vector<double>)timeSeries["Low"];

var volume = timeSeries["Volume"].As<double>();

// Let's print some basic statistics for the full data set:
Console.WriteLine("Total # observations: {0}", timeSeries.RowCount);
Console.WriteLine("Average volume: {0:F0}", volume.Mean());
Console.WriteLine("Total volume: {0:F0}", volume.Sum());

//
// Filtering
//

// Use the GetRows method to select subsets of rows.

// You can use a sequence of keys:
var subset = timeSeries.GetRows(new[] {
new DateTime(2000,3,1), new DateTime(2000,3,2) });

// When the index is sorted, you can use a range:
subset = timeSeries.GetRows(
new DateTime(2000, 1, 1), new DateTime(2010, 1, 1));

// Another option is to use a boolean mask. Here we select
// observations where the close price was greater
// than the open price:
var filter = Vector.GreaterThan(close, open);
// Then we can use the GetRows method:
subset = timeSeries.GetRows(filter);
// Data is now filtered:
Console.WriteLine("Filtered # observations: {0}", subset.RowCount);

// Masks can be combined using logical operations:
var volumeFilter = volume.Map(x => 200e+6 <= x && x < 300e+6);
Console.WriteLine("Volume filtered #: {0}", volumeFilter.CountTrue());
var intersection = Vector.And(volumeFilter, filter);
var union = Vector.Or(volumeFilter, filter);
var negation = Vector.Not(filter);

Console.WriteLine("Combined filtered #: {0}", intersection.CountTrue());
subset = timeSeries.GetRows(intersection);

// When the row index is ordered, it is possible
// to get the rows with the key nearest to the
// supplied keys:
var startDate = new DateTime(2001, 1, 1, 3, 0, 0);
var offsetDates = Index.CreateDateRange(startDate,
100, Recurrence.Daily);
subset = timeSeries.GetNearestRows(offsetDates, Direction.Forward);

//
// Sorting
//

// The simplest way to sort data is calling the Sort method
// with the name of the variable to sort on:
var sortedSeries = timeSeries.SortBy("High", SortOrder.Descending);
var sortedHigh = sortedSeries.GetColumn("High")[new Range(0, 4)];
Console.WriteLine("Largest 'High' values:");
Console.WriteLine(sortedHigh.ToString("F2"));

// If you just want the largest few items in a series,
// you can use the Top Or Bottom method
Console.WriteLine(high.Top(5).ToString("F2"));

Console.Write("Press any key to exit.");
}

static DataFrame<DateTime, string> LoadTimeSeriesData()
{
string filename = @"..\..\..\..\Data\MicrosoftStock.xls";
string connectionString = "Provider=Microsoft.Jet.OLEDB.4.0;Data Source="+filename+";Extended Properties=\"Excel 8.0;HDR=Yes;IMEX=1\"";
OleDbConnection cnn = null;
DataSet ds = new DataSet();
try
{
cnn = new OleDbConnection(connectionString);
cnn.Open();
}
catch (OleDbException ex)
{
Console.WriteLine(ex.InnerException);
}
finally
{
if (cnn != null)
cnn.Close();
}
return DataFrame.FromDataTable<DateTime>(ds.Tables[0], "Date");
}
}
}```