Skip to content

Commit c6c1331

Browse files
committed
ENH: basic downsampling with PeriodIndex, GH pandas-dev#1109
1 parent 839d991 commit c6c1331

File tree

4 files changed

+60
-26
lines changed

4 files changed

+60
-26
lines changed

pandas/core/generic.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# pylint: disable=W0231
1+
# pylint: disable=W0231,E1101
22
from datetime import timedelta
33

44
import numpy as np
@@ -182,12 +182,11 @@ def resample(self, rule, how='mean', axis=0, as_index=True,
182182
"""
183183
from pandas.tseries.resample import TimeGrouper
184184

185-
idx = self._get_axis(axis)
186-
if not isinstance(idx, DatetimeIndex):
187-
raise ValueError("Cannot call resample with non-DatetimeIndex")
185+
if axis != 0:
186+
raise NotImplementedError
188187

189188
grouper = TimeGrouper(rule, label=label, closed=closed,
190-
axis=self.index, kind=kind)
189+
axis=self._get_axis(axis), kind=kind)
191190

192191
# since binner extends endpoints
193192
if grouper.downsamples:

pandas/tseries/period.py

Lines changed: 15 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ def _field_accessor(name, alias=None):
3535
def f(self):
3636
base, mult = _gfc(self.freq)
3737
g = getattr(lib, 'get_period_%s_arr' % alias)
38-
return g(self.ordinal, base, mult)
38+
return g(self.values, base, mult)
3939
f.__name__ = name
4040
return property(f)
4141

@@ -580,26 +580,25 @@ def asfreq(self, freq=None, how='E'):
580580
else:
581581
base2, mult2 = freq
582582

583-
584-
new_data = lib.period_asfreq_arr(self.values,
585-
base1, mult1,
586-
base2, mult2, py3compat.str_to_bytes(how))
583+
new_data = lib.period_asfreq_arr(self.values, base1, mult1,
584+
base2, mult2,
585+
py3compat.str_to_bytes(how))
587586

588587
return PeriodIndex(new_data, freq=freq)
589588

590-
year = _period_field_accessor('year')
591-
month = _period_field_accessor('month')
592-
day = _period_field_accessor('day')
593-
hour = _period_field_accessor('hour')
594-
minute = _period_field_accessor('minute')
595-
second = _period_field_accessor('second')
596-
weekofyear = _period_field_accessor('week')
589+
year = _field_accessor('year')
590+
month = _field_accessor('month')
591+
day = _field_accessor('day')
592+
hour = _field_accessor('hour')
593+
minute = _field_accessor('minute')
594+
second = _field_accessor('second')
595+
weekofyear = _field_accessor('week')
597596
week = weekofyear
598-
dayofweek = _period_field_accessor('dayofweek', 'dow')
597+
dayofweek = _field_accessor('dayofweek', 'dow')
599598
weekday = dayofweek
600-
dayofyear = day_of_year = _period_field_accessor('dayofyear', 'doy')
601-
quarter = _period_field_accessor('quarter')
602-
qyear = _period_field_accessor('qyear')
599+
dayofyear = day_of_year = _field_accessor('dayofyear', 'doy')
600+
quarter = _field_accessor('quarter')
601+
qyear = _field_accessor('qyear')
603602

604603
# Try to run function on index first, and then on elements of index
605604
# Especially important for group-by functionality

pandas/tseries/resample.py

Lines changed: 22 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
from pandas.tseries.frequencies import to_offset
55
from pandas.tseries.index import DatetimeIndex, date_range
66
from pandas.tseries.offsets import DateOffset
7-
from pandas.tseries.period import PeriodIndex
7+
from pandas.tseries.period import PeriodIndex, period_range
88
from pandas.util.decorators import cache_readonly
99
import pandas.core.common as com
1010

@@ -43,7 +43,7 @@ class TimeGrouper(BinGrouper):
4343
def __init__(self, offset='Min', closed='left', label='left',
4444
begin=None, end=None, nperiods=None, axis=None,
4545
kind=None):
46-
self.offset = offset
46+
self.freq = offset
4747
self.closed = closed
4848
self.label = label
4949
self.begin = begin
@@ -61,6 +61,9 @@ def set_axis(self, axis):
6161
if id(self.axis) == id(axis):
6262
return
6363

64+
if not isinstance(axis, (DatetimeIndex, PeriodIndex)):
65+
raise ValueError('Only valid with DatetimeIndex or PeriodIndex')
66+
6467
self.axis = axis
6568

6669
if len(self.axis) < 1:
@@ -100,18 +103,32 @@ def _group_timestamps(self):
100103
return binner, bins, labels
101104
elif self.kind == 'period':
102105
index = PeriodIndex(start=self.axis[0], end=self.axis[-1],
103-
freq=self.offset)
106+
freq=self.freq)
104107

105108
end_stamps = (index + 1).asfreq('D', 's').to_timestamp()
106109
bins = self.axis.searchsorted(end_stamps, side='left')
107110

108111
return index, bins, index
109112

110113
def _group_periods(self):
111-
raise NotImplementedError
114+
if self.kind is None or self.kind == 'period':
115+
# Start vs. end of period
116+
memb = self.axis.asfreq(self.freq)
117+
118+
if len(memb) > 1:
119+
rng = np.arange(memb.values[0], memb.values[-1] + 1)
120+
bins = memb.searchsorted(rng, side='right')
121+
else:
122+
bins = np.array([], dtype=np.int32)
123+
124+
index = period_range(memb[0], memb[-1], freq=self.freq)
125+
return index, bins, index
126+
else:
127+
# Convert to timestamps
128+
pass
112129

113130
def _generate_time_binner(self):
114-
offset = self.offset
131+
offset = self.freq
115132
if isinstance(offset, basestring):
116133
offset = to_offset(offset)
117134

pandas/tseries/tests/test_resample.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -263,10 +263,29 @@ def test_asfreq_non_unique(self):
263263

264264
self.assertRaises(Exception, ts.asfreq, 'B')
265265

266+
266267
def _simple_ts(start, end, freq='D'):
267268
rng = date_range(start, end, freq=freq)
268269
return Series(np.random.randn(len(rng)), index=rng)
269270

271+
def _simple_pts(start, end, freq='D'):
272+
rng = period_range(start, end, freq=freq)
273+
return Series(np.random.randn(len(rng)), index=rng)
274+
275+
276+
class TestResamplePeriodIndex(unittest.TestCase):
277+
278+
def test_basic_resample(self):
279+
ts = _simple_pts('1/1/1990', '6/30/1995', freq='M')
280+
281+
result = ts.resample('a-dec')
282+
expected = ts.groupby(ts.index.year).mean()
283+
expected.index = period_range('1/1/1990', '6/30/1995',
284+
freq='a-dec')
285+
assert_series_equal(result, expected)
286+
287+
def test_upsample_ffill(self):
288+
pass
270289

271290
class TestTimeGrouper(unittest.TestCase):
272291

0 commit comments

Comments
 (0)