{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Basic event statistics\n", "**Scott Wales, CLEX CMS**\n", "\n", "We have a set of timeseries from multiple models, and we'd like to compute some basic statistics on where the values exceed some threshold" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import numpy\n", "import xarray\n", "import pandas" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Sample data\n", "\n", "Some random data as a sample, we have dimensions of 'model' and 'time'" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
<xarray.DataArray (model: 5, time: 10)>\n", "array([[-0.32756177, -0.26286746, 0.40030123, 0.35854755, 0.43113588,\n", " 0.19164228, -0.24890014, -0.16822426, 0.0288118 , 0.30851645],\n", " [ 0.02352896, 0.01700063, 0.40107743, 0.09688021, -0.34731748,\n", " 0.33646959, -0.22218415, 0.00075536, 0.41923523, 0.46450296],\n", " [-0.36606797, 0.13691949, -0.24407689, 0.26593951, -0.36044558,\n", " -0.09755949, 0.23047747, -0.30123992, 0.49773007, -0.48146704],\n", " [-0.28529412, -0.4137749 , -0.49678032, -0.20963922, -0.37296851,\n", " 0.32198888, -0.48008865, -0.35185213, -0.19133728, 0.48969387],\n", " [ 0.35537076, -0.34188436, -0.46208259, 0.41417063, -0.07029039,\n", " -0.39533084, 0.21379516, -0.18512164, -0.43433092, -0.29407475]])\n", "Coordinates:\n", " * time (time) datetime64[ns] 2001-01-01 2001-02-01 ... 2001-10-01\n", " * model (model) <U1 'A' 'B' 'C' 'D' 'E'
array([[-0.32756177, -0.26286746, 0.40030123, 0.35854755, 0.43113588,\n", " 0.19164228, -0.24890014, -0.16822426, 0.0288118 , 0.30851645],\n", " [ 0.02352896, 0.01700063, 0.40107743, 0.09688021, -0.34731748,\n", " 0.33646959, -0.22218415, 0.00075536, 0.41923523, 0.46450296],\n", " [-0.36606797, 0.13691949, -0.24407689, 0.26593951, -0.36044558,\n", " -0.09755949, 0.23047747, -0.30123992, 0.49773007, -0.48146704],\n", " [-0.28529412, -0.4137749 , -0.49678032, -0.20963922, -0.37296851,\n", " 0.32198888, -0.48008865, -0.35185213, -0.19133728, 0.48969387],\n", " [ 0.35537076, -0.34188436, -0.46208259, 0.41417063, -0.07029039,\n", " -0.39533084, 0.21379516, -0.18512164, -0.43433092, -0.29407475]])
array(['2001-01-01T00:00:00.000000000', '2001-02-01T00:00:00.000000000',\n", " '2001-03-01T00:00:00.000000000', '2001-04-01T00:00:00.000000000',\n", " '2001-05-01T00:00:00.000000000', '2001-06-01T00:00:00.000000000',\n", " '2001-07-01T00:00:00.000000000', '2001-08-01T00:00:00.000000000',\n", " '2001-09-01T00:00:00.000000000', '2001-10-01T00:00:00.000000000'],\n", " dtype='datetime64[ns]')
array(['A', 'B', 'C', 'D', 'E'], dtype='<U1')
<xarray.DataArray (model: 5)>\n", "array([4, 2, 6, 8, 7])\n", "Coordinates:\n", " * model (model) <U1 'A' 'B' 'C' 'D' 'E'
array([4, 2, 6, 8, 7])
array(['A', 'B', 'C', 'D', 'E'], dtype='<U1')
<xarray.DataArray (model: 5)>\n", "array([-0.32756177, -0.34731748, -0.48146704, -0.49678032, -0.46208259])\n", "Coordinates:\n", " * model (model) <U1 'A' 'B' 'C' 'D' 'E'
array([-0.32756177, -0.34731748, -0.48146704, -0.49678032, -0.46208259])
array(['A', 'B', 'C', 'D', 'E'], dtype='<U1')
<xarray.DataArray (model: 5)>\n", "array([0, 4, 9, 2, 2])\n", "Coordinates:\n", " * model (model) <U1 'A' 'B' 'C' 'D' 'E'
array([0, 4, 9, 2, 2])
array(['A', 'B', 'C', 'D', 'E'], dtype='<U1')
<xarray.DataArray 'time' (model: 5)>\n", "array(['2001-01-01T00:00:00.000000000', '2001-05-01T00:00:00.000000000',\n", " '2001-10-01T00:00:00.000000000', '2001-03-01T00:00:00.000000000',\n", " '2001-03-01T00:00:00.000000000'], dtype='datetime64[ns]')\n", "Coordinates:\n", " time (model) datetime64[ns] 2001-01-01 2001-05-01 ... 2001-03-01\n", " * model (model) <U1 'A' 'B' 'C' 'D' 'E'
array(['2001-01-01T00:00:00.000000000', '2001-05-01T00:00:00.000000000',\n", " '2001-10-01T00:00:00.000000000', '2001-03-01T00:00:00.000000000',\n", " '2001-03-01T00:00:00.000000000'], dtype='datetime64[ns]')
array(['2001-01-01T00:00:00.000000000', '2001-05-01T00:00:00.000000000',\n", " '2001-10-01T00:00:00.000000000', '2001-03-01T00:00:00.000000000',\n", " '2001-03-01T00:00:00.000000000'], dtype='datetime64[ns]')
array(['A', 'B', 'C', 'D', 'E'], dtype='<U1')
<xarray.DataArray 'time' (model: 5)>\n", "array(['2001-01-01T00:00:00.000000000', '2001-05-01T00:00:00.000000000',\n", " '2001-10-01T00:00:00.000000000', '2001-03-01T00:00:00.000000000',\n", " '2001-03-01T00:00:00.000000000'], dtype='datetime64[ns]')\n", "Coordinates:\n", " * model (model) <U1 'A' 'B' 'C' 'D' 'E'
array(['2001-01-01T00:00:00.000000000', '2001-05-01T00:00:00.000000000',\n", " '2001-10-01T00:00:00.000000000', '2001-03-01T00:00:00.000000000',\n", " '2001-03-01T00:00:00.000000000'], dtype='datetime64[ns]')
array(['A', 'B', 'C', 'D', 'E'], dtype='<U1')
<xarray.Dataset>\n", "Dimensions: (model: 5)\n", "Coordinates:\n", " * model (model) <U1 'A' 'B' 'C' 'D' 'E'\n", "Data variables:\n", " count (model) int64 4 2 6 8 7\n", " min (model) float64 -0.3276 -0.3473 -0.4815 -0.4968 -0.4621\n", " min_date (model) datetime64[ns] 2001-01-01 2001-05-01 ... 2001-03-01
array(['A', 'B', 'C', 'D', 'E'], dtype='<U1')
array([4, 2, 6, 8, 7])
array([-0.32756177, -0.34731748, -0.48146704, -0.49678032, -0.46208259])
array(['2001-01-01T00:00:00.000000000', '2001-05-01T00:00:00.000000000',\n", " '2001-10-01T00:00:00.000000000', '2001-03-01T00:00:00.000000000',\n", " '2001-03-01T00:00:00.000000000'], dtype='datetime64[ns]')
<xarray.Dataset>\n", "Dimensions: ()\n", "Coordinates:\n", " model <U1 'A'\n", "Data variables:\n", " count int64 4\n", " min float64 -0.3276\n", " min_date datetime64[ns] 2001-01-01
array('A', dtype='<U1')
array(4)
array(-0.32756177)
array('2001-01-01T00:00:00.000000000', dtype='datetime64[ns]')
\n", " | time | \n", "model | \n", "event_duration | \n", "
---|---|---|---|
0 | \n", "0 | \n", "0 | \n", "2 | \n", "
1 | \n", "1 | \n", "4 | \n", "2 | \n", "
2 | \n", "0 | \n", "3 | \n", "5 | \n", "
3 | \n", "4 | \n", "2 | \n", "2 | \n", "
4 | \n", "4 | \n", "4 | \n", "2 | \n", "
5 | \n", "6 | \n", "0 | \n", "2 | \n", "
6 | \n", "6 | \n", "3 | \n", "3 | \n", "
7 | \n", "7 | \n", "4 | \n", "3 | \n", "
\n", " | model | \n", "time | \n", "event_duration | \n", "min | \n", "min_date | \n", "
---|---|---|---|---|---|
0 | \n", "A | \n", "2001-01-01 | \n", "31 days | \n", "-0.32756177046349233 | \n", "2001-01-01 | \n", "
1 | \n", "E | \n", "2001-02-01 | \n", "28 days | \n", "-0.4620825909697166 | \n", "2001-03-01 | \n", "
2 | \n", "D | \n", "2001-01-01 | \n", "120 days | \n", "-0.49678032011641426 | \n", "2001-03-01 | \n", "
3 | \n", "C | \n", "2001-05-01 | \n", "31 days | \n", "-0.36044557563610247 | \n", "2001-05-01 | \n", "
4 | \n", "E | \n", "2001-05-01 | \n", "31 days | \n", "-0.395330843938117 | \n", "2001-06-01 | \n", "
5 | \n", "A | \n", "2001-07-01 | \n", "31 days | \n", "-0.2489001360755888 | \n", "2001-07-01 | \n", "
6 | \n", "D | \n", "2001-07-01 | \n", "62 days | \n", "-0.48008864559888853 | \n", "2001-07-01 | \n", "
7 | \n", "E | \n", "2001-08-01 | \n", "61 days | \n", "-0.4343309190867375 | \n", "2001-09-01 | \n", "