New Version 6.0!

Try it for free with our fully functional 60-day trial version.

Download now!

QuickStart Samples

Sorting and Filtering QuickStart Sample (C#)

Illustrates how to sort and filter data used for data analysis in C#.

Visual Basic code F# code IronPython code Back to QuickStart Samples

using System;
using System.Data;
using System.Data.OleDb;

namespace Extreme.Numerics.QuickStart.CSharp
{
    using Extreme.DataAnalysis;
    using Extreme.Mathematics;
    using Extreme.Statistics;
    
    /// <summary>
    /// Illustrates sorting and filtering of data frames.
    /// </summary>
    class SortingAndFiltering
    {
        /// <summary>
        /// The main entry point for the application.
        /// </summary>
        [STAThread]
        static void Main(string[] args)
        {
            var timeSeries = LoadTimeSeriesData();
            var date = timeSeries.RowIndex;

            // The following are all equivalent ways of getting
            // a strongly typed vector from a data frame:
            var open = timeSeries["Open"].As<double>();
            var close = timeSeries.GetColumn("Close");
            var high = timeSeries.GetColumn<double>("High");
            var low = (Vector<double>)timeSeries["Low"];

            var volume = timeSeries["Volume"].As<double>();

            // Let's print some basic statistics for the full data set:
            Console.WriteLine("Total # observations: {0}", timeSeries.RowCount);
            Console.WriteLine("Average volume: {0:F0}", volume.Mean());
            Console.WriteLine("Total volume: {0:F0}", volume.Sum());

            //
            // Filtering
            //

            // Use the GetRows method to select subsets of rows.

            // You can use a sequence of keys:
            var subset = timeSeries.GetRows(new[] {
                    new DateTime(2000,3,1), new DateTime(2000,3,2) });

            // When the index is sorted, you can use a range:
            subset = timeSeries.GetRows(
                new DateTime(2000, 1, 1), new DateTime(2010, 1, 1));

            // Another option is to use a boolean mask. Here we select
            // observations where the close price was greater 
            // than the open price:
            var filter = Vector.GreaterThan(close, open);
            // Then we can use the GetRows method:
            subset = timeSeries.GetRows(filter);
            // Data is now filtered:
            Console.WriteLine("Filtered # observations: {0}", subset.RowCount);

            // Masks can be combined using logical operations:
            var volumeFilter = volume.Map(x => 200e+6 <= x && x < 300e+6);
            Console.WriteLine("Volume filtered #: {0}", volumeFilter.CountTrue());
            var intersection = Vector.And(volumeFilter, filter);
            var union = Vector.Or(volumeFilter, filter);
            var negation = Vector.Not(filter);

            Console.WriteLine("Combined filtered #: {0}", intersection.CountTrue());
            subset = timeSeries.GetRows(intersection);

            // When the row index is ordered, it is possible
            // to get the rows with the key nearest to the 
            // supplied keys:
            var startDate = new DateTime(2001, 1, 1, 3, 0, 0);
            var offsetDates = Index.CreateDateRange(startDate,
                100, Recurrence.Daily);
            subset = timeSeries.GetNearestRows(offsetDates, Direction.Forward);

            //
            // Sorting
            //

            // The simplest way to sort data is calling the Sort method 
            // with the name of the variable to sort on:
            var sortedSeries = timeSeries.SortBy("High", SortOrder.Descending);
            var sortedHigh = sortedSeries.GetColumn("High")[new Range(0, 4)];
            Console.WriteLine("Largest 'High' values:");
            Console.WriteLine(sortedHigh.ToString("F2"));

            // If you just want the largest few items in a series,
            // you can use the Top Or Bottom method
            Console.WriteLine(high.Top(5).ToString("F2"));

            Console.Write("Press any key to exit.");
            Console.ReadLine();
        }

        static DataFrame<DateTime, string> LoadTimeSeriesData()
        {
            string filename = @"..\..\..\..\Data\MicrosoftStock.xls";
            string connectionString = "Provider=Microsoft.Jet.OLEDB.4.0;Data Source="+filename+";Extended Properties=\"Excel 8.0;HDR=Yes;IMEX=1\"";
            OleDbConnection cnn = null;
            DataSet ds = new DataSet();
            try
            {
                cnn = new OleDbConnection(connectionString);
                cnn.Open();
                OleDbDataAdapter adapter = new OleDbDataAdapter("Select * from [MicrosoftStock$]", cnn);
                adapter.Fill(ds);
            }
            catch (OleDbException ex)
            {
                Console.WriteLine(ex.InnerException);
            }
            finally
            {
                if (cnn != null)
                    cnn.Close();
            }
            return DataFrame.FromDataTable<DateTime>(ds.Tables[0], "Date");
        }
    }
}