Skip Navigation Links

Try it now

Latest version 8.1.20 (August 2023)

Try it for free with our fully functional 30-day trial.

nuget Get from
Nuget

QuickStart Samples

Sorting and Filtering QuickStart Sample (IronPython)

Illustrates how to sort and filter data used for data analysis in IronPython.

View this sample in: C# Visual Basic F#

```Python
import numerics

import clr
clr.AddReference("System.Data")
from System.Data import *
from System.Data.OleDb import *

# Variable classes reside in the Extreme.Statistics namespace.
from Extreme.Statistics import *
    
# Illustrates sorting and filtering of data sets and variables.

def LoadTimeSeriesData():
	filename = r"..\Data\MicrosoftStock.xls"
	connectionString = "Provider=Microsoft.Jet.OLEDB.4.0;Data Source="+filename+";Extended Properties='Excel 8.0;HDR=Yes;IMEX=1'"
	cnn = None
	ds = DataSet()
	try:
		cnn = OleDbConnection(connectionString)
		cnn.Open()
		adapter = OleDbDataAdapter("Select * from [MicrosoftStock$]", cnn)
		adapter.Fill(	ds)
	except ex as OleDbException:
		print ex.InnerException
	finally:
		if cnn != None:
			cnn.Close()
	return ds.Tables[0]

seriesTable = LoadTimeSeriesData()
timeSeries = VariableCollection(seriesTable)

# Next, we create some helper variables:
date = timeSeries["Date"]
high = timeSeries["High"]
low = timeSeries["Low"]
open = timeSeries["Open"]
close = timeSeries["Close"]
volume = timeSeries["Volume"]

# Let's print some basic statistics for the full data set:
print "Total # observations:", timeSeries.Observations.Count
print "Average volume: {0:.0f}".format(volume.Mean)
print "Total volume: {0:.0f}".format(volume.Sum)

#
# Filtering
#

# Next, we create a filter, selecting observations where the close price
# was greater than the open price:
filter = close.Filters.GreaterThan(open)
# and set the VariableCollection's Filter property.
timeSeries.Filter = filter

# Data is now filtered:
print "Filtered # observations:", timeSeries.Observations.Count
# Summary statistics apply only to the filtered data:
print "Average volume: {0:.0f}".format(volume.Mean)
print "Total volume: {0:.0f}".format(volume.Sum)

# Filters can be combined using set operations.
volumeFilter = volume.Filters.Between(200e+6, 300e+6)
print "Volume filtered #:", volumeFilter.FilteredLength
combinedFilter = Filter.Intersection(volumeFilter, filter)
# Alternatively: combinedFilter = volumeFilter & filter
print "Combined filtered #:", combinedFilter.FilteredLength
timeSeries.Filter = combinedFilter

#
# Sorting
#

# The simplest way to sort data is calling the Sort method 
# with the name of the variable to sort on:
timeSeries.Sort("High")
for i in range(timeSeries.Observations.Count):
    print "{0:8.2f}".format(high[i])
print 

# We can also create a CollectionSortOrder object that
# defines the sort order over multiple fields/variables.
# We first create an order with one variable:
ordering = CollectionSortOrder(timeSeries, "High", SortOrder.Ascending)
# We then use the Then method repeatedly to add more sort fields:
ordering = ordering.Then("Date", SortOrder.Descending)

# And call the Sort method with the CollectionSortOrder object:
timeSeries.Filter = high.Filters.Between(25.11, 25.15)
# Sort orders are automatically combined with filters.
timeSeries.Sort(ordering)

print "Sorted on High, Date (desc.)"
for i in range(timeSeries.Observations.Count):
    print "{0:8.2f} {1:11}".format(high[i], date[i].ToShortDateString())
print 

print "Sorted on High, Date"
timeSeries.Sort(CollectionSortOrder(high, SortOrder.Ascending) \
    .Then(date, SortOrder.Ascending))
for i in range(timeSeries.Observations.Count):
    print "{0:8.2f} {1:11}".format(high[i], date[i].ToShortDateString())
print 

# The sort order remains, even if we change the filter:
timeSeries.Filter = high.Filters.Between(25.21, 25.25)
print "Same sort order, different data"
for i in range(timeSeries.Observations.Count):
    print "{0:8.2f} {1:11}".format(high[i], date[i].ToShortDateString())
print 
```