From 7cbebaf981e45648358943e626edc25677c2104d Mon Sep 17 00:00:00 2001
From: Uddeshya Singh <singhuddeshyaofficial@gmail.com>
Date: Thu, 21 Jun 2018 15:09:18 +0530
Subject: [PATCH 01/55] Update merging.rst (#21568)

---
 doc/source/merging.rst | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/doc/source/merging.rst b/doc/source/merging.rst
index 1161656731f88..4d7cd0bdadef7 100644
--- a/doc/source/merging.rst
+++ b/doc/source/merging.rst
@@ -279,7 +279,7 @@ need to be:
 
 Ignoring indexes on the concatenation axis
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-For ``DataFrame``s which don't have a meaningful index, you may wish to append
+For ``DataFrame`` s which don't have a meaningful index, you may wish to append
 them and ignore the fact that they may have overlapping indexes. To do this, use
 the ``ignore_index`` argument:
 
@@ -314,7 +314,7 @@ This is also a valid argument to :meth:`DataFrame.append`:
 Concatenating with mixed ndims
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-You can concatenate a mix of ``Series`` and ``DataFrame``s. The
+You can concatenate a mix of ``Series`` and ``DataFrame`` s. The
 ``Series`` will be transformed to ``DataFrame`` with the column name as
 the name of the ``Series``.
 

From 3b65b9572a1fc8a2b232544d4e194b7d9eacdaa6 Mon Sep 17 00:00:00 2001
From: gfyoung <gfyoung17@gmail.com>
Date: Mon, 11 Jun 2018 17:15:29 -0700
Subject: [PATCH 02/55] DOC: Add 0.23.2 whatsnew template (#21433)

(cherry picked from commit 879b15f3476d81d51f236d13684444579bafb8fd)
---
 doc/source/whatsnew/v0.23.2.txt | 82 +++++++++++++++++++++++++++++++++
 1 file changed, 82 insertions(+)
 create mode 100644 doc/source/whatsnew/v0.23.2.txt

diff --git a/doc/source/whatsnew/v0.23.2.txt b/doc/source/whatsnew/v0.23.2.txt
new file mode 100644
index 0000000000000..ec2eddcfd4d41
--- /dev/null
+++ b/doc/source/whatsnew/v0.23.2.txt
@@ -0,0 +1,82 @@
+.. _whatsnew_0232:
+
+v0.23.2
+-------
+
+This is a minor bug-fix release in the 0.23.x series and includes some small regression fixes
+and bug fixes. We recommend that all users upgrade to this version.
+
+.. contents:: What's new in v0.23.2
+    :local:
+    :backlinks: none
+
+.. _whatsnew_0232.enhancements:
+
+New features
+~~~~~~~~~~~~
+
+
+.. _whatsnew_0232.deprecations:
+
+Deprecations
+~~~~~~~~~~~~
+
+-
+-
+
+.. _whatsnew_0232.performance:
+
+Performance Improvements
+~~~~~~~~~~~~~~~~~~~~~~~~
+
+-
+-
+
+Documentation Changes
+~~~~~~~~~~~~~~~~~~~~~
+
+-
+-
+
+.. _whatsnew_0232.bug_fixes:
+
+Bug Fixes
+~~~~~~~~~
+
+-
+-
+
+Conversion
+^^^^^^^^^^
+
+-
+-
+
+Indexing
+^^^^^^^^
+
+-
+-
+
+I/O
+^^^
+
+-
+-
+
+Plotting
+^^^^^^^^
+
+-
+-
+
+Reshaping
+^^^^^^^^^
+
+-
+-
+
+Categorical
+^^^^^^^^^^^
+
+-

From 22c5145861fcf21567e46dcb7fb608b08cdd66a1 Mon Sep 17 00:00:00 2001
From: gfyoung <gfyoung17@gmail.com>
Date: Mon, 11 Jun 2018 17:16:36 -0700
Subject: [PATCH 03/55] MAINT: More friendly error msg on Index overflow
 (#21377)

* MAINT: More useful error msg on Index overflow

Display a more friendly error message when there
is an OverflowError during Index construction.

Partially addresses gh-15832.

* DOC: Clarify how Index.__new__ handles dtype

Partially addresses gh-15823.

(cherry picked from commit defdb34bafa3900069d399ce597c0abbd4a2b0cc)
---
 pandas/core/indexes/base.py       | 12 +++++++++++-
 pandas/tests/indexes/test_base.py |  7 +++++++
 2 files changed, 18 insertions(+), 1 deletion(-)

diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index 90238af9b3632..5fdb8fc59deca 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -187,6 +187,9 @@ class Index(IndexOpsMixin, PandasObject):
     ----------
     data : array-like (1-dimensional)
     dtype : NumPy dtype (default: object)
+        If dtype is None, we find the dtype that best fits the data.
+        If an actual dtype is provided, we coerce to that dtype if it's safe.
+        Otherwise, an error will be raised.
     copy : bool
         Make a copy of input ndarray
     name : object
@@ -312,7 +315,14 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None,
                     if is_integer_dtype(dtype):
                         inferred = lib.infer_dtype(data)
                         if inferred == 'integer':
-                            data = np.array(data, copy=copy, dtype=dtype)
+                            try:
+                                data = np.array(data, copy=copy, dtype=dtype)
+                            except OverflowError:
+                                # gh-15823: a more user-friendly error message
+                                raise OverflowError(
+                                    "the elements provided in the data cannot "
+                                    "all be casted to the dtype {dtype}"
+                                    .format(dtype=dtype))
                         elif inferred in ['floating', 'mixed-integer-float']:
                             if isna(data).any():
                                 raise ValueError('cannot convert float '
diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py
index 1e4dd2921b3f5..19acfb294762c 100644
--- a/pandas/tests/indexes/test_base.py
+++ b/pandas/tests/indexes/test_base.py
@@ -455,6 +455,13 @@ def test_constructor_nonhashable_name(self, indices):
         tm.assert_raises_regex(TypeError, message,
                                indices.set_names, names=renamed)
 
+    def test_constructor_overflow_int64(self):
+        # see gh-15832
+        msg = ("the elements provided in the data cannot "
+               "all be casted to the dtype int64")
+        with tm.assert_raises_regex(OverflowError, msg):
+            Index([np.iinfo(np.uint64).max - 1], dtype="int64")
+
     def test_view_with_args(self):
 
         restricted = ['unicodeIndex', 'strIndex', 'catIndex', 'boolIndex',

From 191767168dfa21639d16a16319245969a8e974ad Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Tue, 12 Jun 2018 09:54:11 +0200
Subject: [PATCH 04/55] DOC: follow 0.23.1 template for 0.23.2 whatsnew
 (#21435)

(cherry picked from commit 1275f91b74d8a48671eb8e705807bf852a8806a8)
---
 doc/source/whatsnew/v0.23.2.txt | 36 +++++++++++++++------------------
 1 file changed, 16 insertions(+), 20 deletions(-)

diff --git a/doc/source/whatsnew/v0.23.2.txt b/doc/source/whatsnew/v0.23.2.txt
index ec2eddcfd4d41..c636e73fbd6c2 100644
--- a/doc/source/whatsnew/v0.23.2.txt
+++ b/doc/source/whatsnew/v0.23.2.txt
@@ -10,16 +10,11 @@ and bug fixes. We recommend that all users upgrade to this version.
     :local:
     :backlinks: none
 
-.. _whatsnew_0232.enhancements:
 
-New features
-~~~~~~~~~~~~
+.. _whatsnew_0232.fixed_regressions:
 
-
-.. _whatsnew_0232.deprecations:
-
-Deprecations
-~~~~~~~~~~~~
+Fixed Regressions
+~~~~~~~~~~~~~~~~~
 
 -
 -
@@ -43,40 +38,41 @@ Documentation Changes
 Bug Fixes
 ~~~~~~~~~
 
+**Groupby/Resample/Rolling**
+
 -
 -
 
-Conversion
-^^^^^^^^^^
+**Conversion**
+
 
 -
 -
 
-Indexing
-^^^^^^^^
+**Indexing**
 
 -
 -
 
-I/O
-^^^
+**I/O**
 
 -
 -
 
-Plotting
-^^^^^^^^
+**Plotting**
 
 -
 -
 
-Reshaping
-^^^^^^^^^
+**Reshaping**
 
 -
 -
 
-Categorical
-^^^^^^^^^^^
+**Categorical**
+
+-
+
+**Other**
 
 -

From 475c8bcfde52545b7f46d3035691f20487415160 Mon Sep 17 00:00:00 2001
From: alimcmaster1 <alimcmaster1@gmail.com>
Date: Wed, 13 Jun 2018 11:25:58 +0100
Subject: [PATCH 05/55] Fix tests fragile to PATH (#21453)

(cherry picked from commit 7a49449b8c95fed027af1da35970743f23a93dff)
---
 pandas/tests/plotting/test_converter.py | 3 ++-
 pandas/tests/test_downstream.py         | 3 ++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/pandas/tests/plotting/test_converter.py b/pandas/tests/plotting/test_converter.py
index 47cded19f5300..bb976a1e3e81c 100644
--- a/pandas/tests/plotting/test_converter.py
+++ b/pandas/tests/plotting/test_converter.py
@@ -1,4 +1,5 @@
 import subprocess
+import sys
 import pytest
 from datetime import datetime, date
 
@@ -27,7 +28,7 @@ def test_register_by_default(self):
                 "import pandas as pd; "
                 "units = dict(matplotlib.units.registry); "
                 "assert pd.Timestamp in units)'")
-        call = ['python', '-c', code]
+        call = [sys.executable, '-c', code]
         assert subprocess.check_call(call) == 0
 
     def test_warns(self):
diff --git a/pandas/tests/test_downstream.py b/pandas/tests/test_downstream.py
index afd7993fefc70..cf98cff97669a 100644
--- a/pandas/tests/test_downstream.py
+++ b/pandas/tests/test_downstream.py
@@ -3,6 +3,7 @@
 Testing that we work in the downstream packages
 """
 import subprocess
+import sys
 
 import pytest
 import numpy as np  # noqa
@@ -57,7 +58,7 @@ def test_xarray(df):
 
 def test_oo_optimizable():
     # GH 21071
-    subprocess.check_call(["python", "-OO", "-c", "import pandas"])
+    subprocess.check_call([sys.executable, "-OO", "-c", "import pandas"])
 
 
 @tm.network

From d4c48aaadfa2a6cbf2375631101b79752504f004 Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <emailformattr@gmail.com>
Date: Wed, 13 Jun 2018 03:51:41 -0700
Subject: [PATCH 06/55] BUG: Construct Timestamp with tz correctly near DST
 border (#21407)

(cherry picked from commit bc4ccd7dfaceb92ac2c6dc345c1bc4489407108f)
---
 doc/source/whatsnew/v0.23.2.txt               |  4 ++++
 pandas/_libs/tslibs/conversion.pyx            | 22 ++++---------------
 pandas/tests/frame/test_timezones.py          | 10 +++++++++
 .../indexes/datetimes/test_construction.py    |  9 ++++++++
 .../indexes/datetimes/test_date_range.py      | 14 ++++++++++++
 .../tests/scalar/timestamp/test_timestamp.py  |  8 +++++++
 6 files changed, 49 insertions(+), 18 deletions(-)

diff --git a/doc/source/whatsnew/v0.23.2.txt b/doc/source/whatsnew/v0.23.2.txt
index c636e73fbd6c2..1de44ffeb4160 100644
--- a/doc/source/whatsnew/v0.23.2.txt
+++ b/doc/source/whatsnew/v0.23.2.txt
@@ -73,6 +73,10 @@ Bug Fixes
 
 -
 
+**Timezones**
+- Bug in :class:`Timestamp` and :class:`DatetimeIndex` where passing a :class:`Timestamp` localized after a DST transition would return a datetime before the DST transition (:issue:`20854`)
+- Bug in comparing :class:`DataFrame`s with tz-aware :class:`DatetimeIndex` columns with a DST transition that raised a ``KeyError`` (:issue:`19970`)
+
 **Other**
 
 -
diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx
index f4841e6abb7e8..3cbef82437544 100644
--- a/pandas/_libs/tslibs/conversion.pyx
+++ b/pandas/_libs/tslibs/conversion.pyx
@@ -347,25 +347,11 @@ cdef _TSObject convert_datetime_to_tsobject(datetime ts, object tz,
     if tz is not None:
         tz = maybe_get_tz(tz)
 
-        # sort of a temporary hack
         if ts.tzinfo is not None:
-            if hasattr(tz, 'normalize') and hasattr(ts.tzinfo, '_utcoffset'):
-                ts = tz.normalize(ts)
-                obj.value = pydatetime_to_dt64(ts, &obj.dts)
-                obj.tzinfo = ts.tzinfo
-            else:
-                # tzoffset
-                try:
-                    tz = ts.astimezone(tz).tzinfo
-                except:
-                    pass
-                obj.value = pydatetime_to_dt64(ts, &obj.dts)
-                ts_offset = get_utcoffset(ts.tzinfo, ts)
-                obj.value -= int(ts_offset.total_seconds() * 1e9)
-                tz_offset = get_utcoffset(tz, ts)
-                obj.value += int(tz_offset.total_seconds() * 1e9)
-                dt64_to_dtstruct(obj.value, &obj.dts)
-                obj.tzinfo = tz
+            # Convert the current timezone to the passed timezone
+            ts = ts.astimezone(tz)
+            obj.value = pydatetime_to_dt64(ts, &obj.dts)
+            obj.tzinfo = ts.tzinfo
         elif not is_utc(tz):
             ts = _localize_pydatetime(ts, tz)
             obj.value = pydatetime_to_dt64(ts, &obj.dts)
diff --git a/pandas/tests/frame/test_timezones.py b/pandas/tests/frame/test_timezones.py
index fa589a0aa4817..3956968173070 100644
--- a/pandas/tests/frame/test_timezones.py
+++ b/pandas/tests/frame/test_timezones.py
@@ -133,3 +133,13 @@ def test_frame_reset_index(self, tz):
         xp = df.index.tz
         rs = roundtripped.index.tz
         assert xp == rs
+
+    @pytest.mark.parametrize('tz', [None, 'America/New_York'])
+    def test_boolean_compare_transpose_tzindex_with_dst(self, tz):
+        # GH 19970
+        idx = date_range('20161101', '20161130', freq='4H', tz=tz)
+        df = DataFrame({'a': range(len(idx)), 'b': range(len(idx))},
+                       index=idx)
+        result = df.T == df.T
+        expected = DataFrame(True, index=list('ab'), columns=idx)
+        tm.assert_frame_equal(result, expected)
diff --git a/pandas/tests/indexes/datetimes/test_construction.py b/pandas/tests/indexes/datetimes/test_construction.py
index dae69a86910af..b138b79caac76 100644
--- a/pandas/tests/indexes/datetimes/test_construction.py
+++ b/pandas/tests/indexes/datetimes/test_construction.py
@@ -469,6 +469,15 @@ def test_constructor_with_non_normalized_pytz(self, tz):
         result = DatetimeIndex(['2010'], tz=non_norm_tz)
         assert pytz.timezone(tz) is result.tz
 
+    def test_constructor_timestamp_near_dst(self):
+        # GH 20854
+        ts = [Timestamp('2016-10-30 03:00:00+0300', tz='Europe/Helsinki'),
+              Timestamp('2016-10-30 03:00:00+0200', tz='Europe/Helsinki')]
+        result = DatetimeIndex(ts)
+        expected = DatetimeIndex([ts[0].to_pydatetime(),
+                                  ts[1].to_pydatetime()])
+        tm.assert_index_equal(result, expected)
+
 
 class TestTimeSeries(object):
 
diff --git a/pandas/tests/indexes/datetimes/test_date_range.py b/pandas/tests/indexes/datetimes/test_date_range.py
index 193804b66395b..ec37bbbcb6c02 100644
--- a/pandas/tests/indexes/datetimes/test_date_range.py
+++ b/pandas/tests/indexes/datetimes/test_date_range.py
@@ -278,6 +278,20 @@ def test_wom_len(self, periods):
         res = date_range(start='20110101', periods=periods, freq='WOM-1MON')
         assert len(res) == periods
 
+    def test_construct_over_dst(self):
+        # GH 20854
+        pre_dst = Timestamp('2010-11-07 01:00:00').tz_localize('US/Pacific',
+                                                               ambiguous=True)
+        pst_dst = Timestamp('2010-11-07 01:00:00').tz_localize('US/Pacific',
+                                                               ambiguous=False)
+        expect_data = [Timestamp('2010-11-07 00:00:00', tz='US/Pacific'),
+                       pre_dst,
+                       pst_dst]
+        expected = DatetimeIndex(expect_data)
+        result = date_range(start='2010-11-7', periods=3,
+                            freq='H', tz='US/Pacific')
+        tm.assert_index_equal(result, expected)
+
 
 class TestGenRangeGeneration(object):
 
diff --git a/pandas/tests/scalar/timestamp/test_timestamp.py b/pandas/tests/scalar/timestamp/test_timestamp.py
index ab87d98fca8eb..4689c7bea626f 100644
--- a/pandas/tests/scalar/timestamp/test_timestamp.py
+++ b/pandas/tests/scalar/timestamp/test_timestamp.py
@@ -528,6 +528,14 @@ def test_disallow_setting_tz(self, tz):
         with pytest.raises(AttributeError):
             ts.tz = tz
 
+    @pytest.mark.parametrize('offset', ['+0300', '+0200'])
+    def test_construct_timestamp_near_dst(self, offset):
+        # GH 20854
+        expected = Timestamp('2016-10-30 03:00:00{}'.format(offset),
+                             tz='Europe/Helsinki')
+        result = Timestamp(expected, tz='Europe/Helsinki')
+        assert result == expected
+
 
 class TestTimestamp(object):
 

From 14e5f3d4e604a9abe1ebefe9a136b026add6a7fc Mon Sep 17 00:00:00 2001
From: Pietro Battiston <me@pietrobattiston.it>
Date: Wed, 13 Jun 2018 15:24:01 +0200
Subject: [PATCH 07/55] BUG: fix get_indexer_non_unique with CategoricalIndex
 key (#21457)

closes #21448
(cherry picked from commit 576d5c6b76e039a411a7cc4c0de29813e2de0149)
---
 doc/source/whatsnew/v0.23.2.txt           |  2 +-
 pandas/core/indexes/base.py               |  3 +++
 pandas/core/indexes/category.py           |  7 ++++++-
 pandas/tests/categorical/test_indexing.py | 20 +++++++++++++++++++-
 4 files changed, 29 insertions(+), 3 deletions(-)

diff --git a/doc/source/whatsnew/v0.23.2.txt b/doc/source/whatsnew/v0.23.2.txt
index 1de44ffeb4160..3e4326dea2ecc 100644
--- a/doc/source/whatsnew/v0.23.2.txt
+++ b/doc/source/whatsnew/v0.23.2.txt
@@ -51,7 +51,7 @@ Bug Fixes
 
 **Indexing**
 
--
+- Bug in :meth:`Index.get_indexer_non_unique` with categorical key (:issue:`21448`)
 -
 
 **I/O**
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index 5fdb8fc59deca..a85a0ea88855c 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -31,6 +31,7 @@
     is_dtype_equal,
     is_dtype_union_equal,
     is_object_dtype,
+    is_categorical,
     is_categorical_dtype,
     is_interval_dtype,
     is_period_dtype,
@@ -3357,6 +3358,8 @@ def _filter_indexer_tolerance(self, target, indexer, tolerance):
     @Appender(_index_shared_docs['get_indexer_non_unique'] % _index_doc_kwargs)
     def get_indexer_non_unique(self, target):
         target = _ensure_index(target)
+        if is_categorical(target):
+            target = target.astype(target.dtype.categories.dtype)
         pself, ptarget = self._maybe_promote(target)
         if pself is not self or ptarget is not target:
             return pself.get_indexer_non_unique(ptarget)
diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py
index 150eca32e229d..587090fa72def 100644
--- a/pandas/core/indexes/category.py
+++ b/pandas/core/indexes/category.py
@@ -598,7 +598,12 @@ def get_indexer_non_unique(self, target):
         target = ibase._ensure_index(target)
 
         if isinstance(target, CategoricalIndex):
-            target = target.categories
+            # Indexing on codes is more efficient if categories are the same:
+            if target.categories is self.categories:
+                target = target.codes
+                indexer, missing = self._engine.get_indexer_non_unique(target)
+                return _ensure_platform_int(indexer), missing
+            target = target.values
 
         codes = self.categories.get_indexer(target)
         indexer, missing = self._engine.get_indexer_non_unique(codes)
diff --git a/pandas/tests/categorical/test_indexing.py b/pandas/tests/categorical/test_indexing.py
index 9c27b1101e5ca..cf7b5cfa55882 100644
--- a/pandas/tests/categorical/test_indexing.py
+++ b/pandas/tests/categorical/test_indexing.py
@@ -5,7 +5,7 @@
 import numpy as np
 
 import pandas.util.testing as tm
-from pandas import Categorical, Index, PeriodIndex
+from pandas import Categorical, Index, CategoricalIndex, PeriodIndex
 from pandas.tests.categorical.common import TestCategorical
 
 
@@ -103,3 +103,21 @@ def f():
             s.categories = [1, 2]
 
         pytest.raises(ValueError, f)
+
+    # Combinations of sorted/unique:
+    @pytest.mark.parametrize("idx_values", [[1, 2, 3, 4], [1, 3, 2, 4],
+                                            [1, 3, 3, 4], [1, 2, 2, 4]])
+    # Combinations of missing/unique
+    @pytest.mark.parametrize("key_values", [[1, 2], [1, 5], [1, 1], [5, 5]])
+    @pytest.mark.parametrize("key_class", [Categorical, CategoricalIndex])
+    def test_get_indexer_non_unique(self, idx_values, key_values, key_class):
+        # GH 21448
+        key = key_class(key_values, categories=range(1, 5))
+        # Test for flat index and CategoricalIndex with same/different cats:
+        for dtype in None, 'category', key.dtype:
+            idx = Index(idx_values, dtype=dtype)
+            expected, exp_miss = idx.get_indexer_non_unique(key_values)
+            result, res_miss = idx.get_indexer_non_unique(key)
+
+            tm.assert_numpy_array_equal(expected, result)
+            tm.assert_numpy_array_equal(exp_miss, res_miss)

From 2272ef4d7d99018f6f570317f7ec3a3d0cd92580 Mon Sep 17 00:00:00 2001
From: Kalyan Gokhale <4734245+KalyanGokhale@users.noreply.github.com>
Date: Thu, 14 Jun 2018 15:53:14 +0530
Subject: [PATCH 08/55] CLN: Comparison methods for MultiIndex should have
 consistent behaviour for all nlevels  (GH21149) (#21195)

(cherry picked from commit a8738ba69cd817f7d57c8c25957d2a59621e875f)
---
 doc/source/whatsnew/v0.23.2.txt    |  1 +
 pandas/core/indexes/base.py        |  3 ++-
 pandas/tests/indexes/test_multi.py | 17 +++++++++++++++++
 3 files changed, 20 insertions(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v0.23.2.txt b/doc/source/whatsnew/v0.23.2.txt
index 3e4326dea2ecc..0d3f9cb8dd3b6 100644
--- a/doc/source/whatsnew/v0.23.2.txt
+++ b/doc/source/whatsnew/v0.23.2.txt
@@ -52,6 +52,7 @@ Bug Fixes
 **Indexing**
 
 - Bug in :meth:`Index.get_indexer_non_unique` with categorical key (:issue:`21448`)
+- Bug in comparison operations for :class:`MultiIndex` where error was raised on equality / inequality comparison involving a MultiIndex with ``nlevels == 1`` (:issue:`21149`)
 -
 
 **I/O**
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index a85a0ea88855c..a2e237c8cc45d 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -97,7 +97,8 @@ def cmp_method(self, other):
         if needs_i8_conversion(self) and needs_i8_conversion(other):
             return self._evaluate_compare(other, op)
 
-        if is_object_dtype(self) and self.nlevels == 1:
+        from .multi import MultiIndex
+        if is_object_dtype(self) and not isinstance(self, MultiIndex):
             # don't pass MultiIndex
             with np.errstate(all='ignore'):
                 result = ops._comp_method_OBJECT_ARRAY(op, self.values, other)
diff --git a/pandas/tests/indexes/test_multi.py b/pandas/tests/indexes/test_multi.py
index 182dbdf2cf4e4..df506ae9486ee 100644
--- a/pandas/tests/indexes/test_multi.py
+++ b/pandas/tests/indexes/test_multi.py
@@ -3295,3 +3295,20 @@ def test_duplicate_multiindex_labels(self):
         with pytest.raises(ValueError):
             ind.set_levels([['A', 'B', 'A', 'A', 'B'], [2, 1, 3, -2, 5]],
                            inplace=True)
+
+    def test_multiindex_compare(self):
+        # GH 21149
+        # Ensure comparison operations for MultiIndex with nlevels == 1
+        # behave consistently with those for MultiIndex with nlevels > 1
+
+        midx = pd.MultiIndex.from_product([[0, 1]])
+
+        # Equality self-test: MultiIndex object vs self
+        expected = pd.Series([True, True])
+        result = pd.Series(midx == midx)
+        tm.assert_series_equal(result, expected)
+
+        # Greater than comparison: MultiIndex object vs self
+        expected = pd.Series([False, False])
+        result = pd.Series(midx > midx)
+        tm.assert_series_equal(result, expected)

From e4e48f8f34adcf1fe6e37ead4cfd2b0b55547f74 Mon Sep 17 00:00:00 2001
From: Jeremy Schendel <jschendel@users.noreply.github.com>
Date: Fri, 15 Jun 2018 11:21:36 -0600
Subject: [PATCH 09/55] BUG: Fix Series.nlargest for integer boundary values
 (#21432)

(cherry picked from commit ec5956ed350d33ac2cee07bf9a24ea5315529443)
---
 doc/source/whatsnew/v0.23.2.txt       |   1 +
 pandas/conftest.py                    |  71 +++++++++
 pandas/core/algorithms.py             |   5 +-
 pandas/tests/frame/test_analytics.py  |  78 +++++-----
 pandas/tests/series/test_analytics.py | 209 ++++++++++++++++++++++++++
 5 files changed, 321 insertions(+), 43 deletions(-)

diff --git a/doc/source/whatsnew/v0.23.2.txt b/doc/source/whatsnew/v0.23.2.txt
index 0d3f9cb8dd3b6..d839a72323c78 100644
--- a/doc/source/whatsnew/v0.23.2.txt
+++ b/doc/source/whatsnew/v0.23.2.txt
@@ -80,4 +80,5 @@ Bug Fixes
 
 **Other**
 
+- Bug in :meth:`Series.nlargest` for signed and unsigned integer dtypes when the minimum value is present (:issue:`21426`)
 -
diff --git a/pandas/conftest.py b/pandas/conftest.py
index d5f399c7cd63d..9d806a91f37f7 100644
--- a/pandas/conftest.py
+++ b/pandas/conftest.py
@@ -129,6 +129,14 @@ def join_type(request):
     return request.param
 
 
+@pytest.fixture(params=['nlargest', 'nsmallest'])
+def nselect_method(request):
+    """
+    Fixture for trying all nselect methods
+    """
+    return request.param
+
+
 @pytest.fixture(params=[None, np.nan, pd.NaT, float('nan'), np.float('NaN')])
 def nulls_fixture(request):
     """
@@ -170,3 +178,66 @@ def string_dtype(request):
     * 'U'
     """
     return request.param
+
+
+@pytest.fixture(params=["float32", "float64"])
+def float_dtype(request):
+    """
+    Parameterized fixture for float dtypes.
+
+    * float32
+    * float64
+    """
+
+    return request.param
+
+
+UNSIGNED_INT_DTYPES = ["uint8", "uint16", "uint32", "uint64"]
+SIGNED_INT_DTYPES = ["int8", "int16", "int32", "int64"]
+ALL_INT_DTYPES = UNSIGNED_INT_DTYPES + SIGNED_INT_DTYPES
+
+
+@pytest.fixture(params=SIGNED_INT_DTYPES)
+def sint_dtype(request):
+    """
+    Parameterized fixture for signed integer dtypes.
+
+    * int8
+    * int16
+    * int32
+    * int64
+    """
+
+    return request.param
+
+
+@pytest.fixture(params=UNSIGNED_INT_DTYPES)
+def uint_dtype(request):
+    """
+    Parameterized fixture for unsigned integer dtypes.
+
+    * uint8
+    * uint16
+    * uint32
+    * uint64
+    """
+
+    return request.param
+
+
+@pytest.fixture(params=ALL_INT_DTYPES)
+def any_int_dtype(request):
+    """
+    Parameterized fixture for any integer dtypes.
+
+    * int8
+    * uint8
+    * int16
+    * uint16
+    * int32
+    * uint32
+    * int64
+    * uint64
+    """
+
+    return request.param
diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py
index 88bc497f9f22d..bcde32696c1ff 100644
--- a/pandas/core/algorithms.py
+++ b/pandas/core/algorithms.py
@@ -1131,9 +1131,12 @@ def compute(self, method):
             return dropped[slc].sort_values(ascending=ascending).head(n)
 
         # fast method
-        arr, _, _ = _ensure_data(dropped.values)
+        arr, pandas_dtype, _ = _ensure_data(dropped.values)
         if method == 'nlargest':
             arr = -arr
+            if is_integer_dtype(pandas_dtype):
+                # GH 21426: ensure reverse ordering at boundaries
+                arr -= 1
 
         if self.keep == 'last':
             arr = arr[::-1]
diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py
index d1a4a5f615b86..90d7c46f7554f 100644
--- a/pandas/tests/frame/test_analytics.py
+++ b/pandas/tests/frame/test_analytics.py
@@ -12,7 +12,7 @@
 from numpy.random import randn
 import numpy as np
 
-from pandas.compat import lrange, product, PY35
+from pandas.compat import lrange, PY35
 from pandas import (compat, isna, notna, DataFrame, Series,
                     MultiIndex, date_range, Timestamp, Categorical,
                     _np_version_under1p12, _np_version_under1p15)
@@ -2240,54 +2240,49 @@ class TestNLargestNSmallest(object):
 
     # ----------------------------------------------------------------------
     # Top / bottom
-    @pytest.mark.parametrize(
-        'method, n, order',
-        product(['nsmallest', 'nlargest'], range(1, 11),
-                [['a'],
-                 ['c'],
-                 ['a', 'b'],
-                 ['a', 'c'],
-                 ['b', 'a'],
-                 ['b', 'c'],
-                 ['a', 'b', 'c'],
-                 ['c', 'a', 'b'],
-                 ['c', 'b', 'a'],
-                 ['b', 'c', 'a'],
-                 ['b', 'a', 'c'],
-
-                 # dups!
-                 ['b', 'c', 'c'],
-
-                 ]))
-    def test_n(self, df_strings, method, n, order):
+    @pytest.mark.parametrize('order', [
+        ['a'],
+        ['c'],
+        ['a', 'b'],
+        ['a', 'c'],
+        ['b', 'a'],
+        ['b', 'c'],
+        ['a', 'b', 'c'],
+        ['c', 'a', 'b'],
+        ['c', 'b', 'a'],
+        ['b', 'c', 'a'],
+        ['b', 'a', 'c'],
+
+        # dups!
+        ['b', 'c', 'c']])
+    @pytest.mark.parametrize('n', range(1, 11))
+    def test_n(self, df_strings, nselect_method, n, order):
         # GH10393
         df = df_strings
         if 'b' in order:
 
             error_msg = self.dtype_error_msg_template.format(
-                column='b', method=method, dtype='object')
+                column='b', method=nselect_method, dtype='object')
             with tm.assert_raises_regex(TypeError, error_msg):
-                getattr(df, method)(n, order)
+                getattr(df, nselect_method)(n, order)
         else:
-            ascending = method == 'nsmallest'
-            result = getattr(df, method)(n, order)
+            ascending = nselect_method == 'nsmallest'
+            result = getattr(df, nselect_method)(n, order)
             expected = df.sort_values(order, ascending=ascending).head(n)
             tm.assert_frame_equal(result, expected)
 
-    @pytest.mark.parametrize(
-        'method, columns',
-        product(['nsmallest', 'nlargest'],
-                product(['group'], ['category_string', 'string'])
-                ))
-    def test_n_error(self, df_main_dtypes, method, columns):
+    @pytest.mark.parametrize('columns', [
+        ('group', 'category_string'), ('group', 'string')])
+    def test_n_error(self, df_main_dtypes, nselect_method, columns):
         df = df_main_dtypes
+        col = columns[1]
         error_msg = self.dtype_error_msg_template.format(
-            column=columns[1], method=method, dtype=df[columns[1]].dtype)
+            column=col, method=nselect_method, dtype=df[col].dtype)
         # escape some characters that may be in the repr
         error_msg = (error_msg.replace('(', '\\(').replace(")", "\\)")
                               .replace("[", "\\[").replace("]", "\\]"))
         with tm.assert_raises_regex(TypeError, error_msg):
-            getattr(df, method)(2, columns)
+            getattr(df, nselect_method)(2, columns)
 
     def test_n_all_dtypes(self, df_main_dtypes):
         df = df_main_dtypes
@@ -2308,15 +2303,14 @@ def test_n_identical_values(self):
         expected = pd.DataFrame({'a': [1] * 3, 'b': [1, 2, 3]})
         tm.assert_frame_equal(result, expected)
 
-    @pytest.mark.parametrize(
-        'n, order',
-        product([1, 2, 3, 4, 5],
-                [['a', 'b', 'c'],
-                 ['c', 'b', 'a'],
-                 ['a'],
-                 ['b'],
-                 ['a', 'b'],
-                 ['c', 'b']]))
+    @pytest.mark.parametrize('order', [
+        ['a', 'b', 'c'],
+        ['c', 'b', 'a'],
+        ['a'],
+        ['b'],
+        ['a', 'b'],
+        ['c', 'b']])
+    @pytest.mark.parametrize('n', range(1, 6))
     def test_n_duplicate_index(self, df_duplicates, n, order):
         # GH 13412
 
diff --git a/pandas/tests/series/test_analytics.py b/pandas/tests/series/test_analytics.py
index 6ea40329f4bc3..7a78b562ac1fa 100644
--- a/pandas/tests/series/test_analytics.py
+++ b/pandas/tests/series/test_analytics.py
@@ -1866,6 +1866,189 @@ def s_main_dtypes():
     return df
 
 
+class TestMode(object):
+
+    @pytest.mark.parametrize('dropna, expected', [
+        (True, Series([], dtype=np.float64)),
+        (False, Series([], dtype=np.float64))
+    ])
+    def test_mode_empty(self, dropna, expected):
+        s = Series([], dtype=np.float64)
+        result = s.mode(dropna)
+        tm.assert_series_equal(result, expected)
+
+    @pytest.mark.parametrize('dropna, data, expected', [
+        (True, [1, 1, 1, 2], [1]),
+        (True, [1, 1, 1, 2, 3, 3, 3], [1, 3]),
+        (False, [1, 1, 1, 2], [1]),
+        (False, [1, 1, 1, 2, 3, 3, 3], [1, 3]),
+    ])
+    @pytest.mark.parametrize(
+        'dt',
+        list(np.typecodes['AllInteger'] + np.typecodes['Float'])
+    )
+    def test_mode_numerical(self, dropna, data, expected, dt):
+        s = Series(data, dtype=dt)
+        result = s.mode(dropna)
+        expected = Series(expected, dtype=dt)
+        tm.assert_series_equal(result, expected)
+
+    @pytest.mark.parametrize('dropna, expected', [
+        (True, [1.0]),
+        (False, [1, np.nan]),
+    ])
+    def test_mode_numerical_nan(self, dropna, expected):
+        s = Series([1, 1, 2, np.nan, np.nan])
+        result = s.mode(dropna)
+        expected = Series(expected)
+        tm.assert_series_equal(result, expected)
+
+    @pytest.mark.parametrize('dropna, expected1, expected2, expected3', [
+        (True, ['b'], ['bar'], ['nan']),
+        (False, ['b'], [np.nan], ['nan'])
+    ])
+    def test_mode_str_obj(self, dropna, expected1, expected2, expected3):
+        # Test string and object types.
+        data = ['a'] * 2 + ['b'] * 3
+
+        s = Series(data, dtype='c')
+        result = s.mode(dropna)
+        expected1 = Series(expected1, dtype='c')
+        tm.assert_series_equal(result, expected1)
+
+        data = ['foo', 'bar', 'bar', np.nan, np.nan, np.nan]
+
+        s = Series(data, dtype=object)
+        result = s.mode(dropna)
+        expected2 = Series(expected2, dtype=object)
+        tm.assert_series_equal(result, expected2)
+
+        data = ['foo', 'bar', 'bar', np.nan, np.nan, np.nan]
+
+        s = Series(data, dtype=object).astype(str)
+        result = s.mode(dropna)
+        expected3 = Series(expected3, dtype=str)
+        tm.assert_series_equal(result, expected3)
+
+    @pytest.mark.parametrize('dropna, expected1, expected2', [
+        (True, ['foo'], ['foo']),
+        (False, ['foo'], [np.nan])
+    ])
+    def test_mode_mixeddtype(self, dropna, expected1, expected2):
+        s = Series([1, 'foo', 'foo'])
+        result = s.mode(dropna)
+        expected = Series(expected1)
+        tm.assert_series_equal(result, expected)
+
+        s = Series([1, 'foo', 'foo', np.nan, np.nan, np.nan])
+        result = s.mode(dropna)
+        expected = Series(expected2, dtype=object)
+        tm.assert_series_equal(result, expected)
+
+    @pytest.mark.parametrize('dropna, expected1, expected2', [
+        (True, ['1900-05-03', '2011-01-03', '2013-01-02'],
+               ['2011-01-03', '2013-01-02']),
+        (False, [np.nan], [np.nan, '2011-01-03', '2013-01-02']),
+    ])
+    def test_mode_datetime(self, dropna, expected1, expected2):
+        s = Series(['2011-01-03', '2013-01-02',
+                    '1900-05-03', 'nan', 'nan'], dtype='M8[ns]')
+        result = s.mode(dropna)
+        expected1 = Series(expected1, dtype='M8[ns]')
+        tm.assert_series_equal(result, expected1)
+
+        s = Series(['2011-01-03', '2013-01-02', '1900-05-03',
+                    '2011-01-03', '2013-01-02', 'nan', 'nan'],
+                   dtype='M8[ns]')
+        result = s.mode(dropna)
+        expected2 = Series(expected2, dtype='M8[ns]')
+        tm.assert_series_equal(result, expected2)
+
+    @pytest.mark.parametrize('dropna, expected1, expected2', [
+        (True, ['-1 days', '0 days', '1 days'], ['2 min', '1 day']),
+        (False, [np.nan], [np.nan, '2 min', '1 day']),
+    ])
+    def test_mode_timedelta(self, dropna, expected1, expected2):
+        # gh-5986: Test timedelta types.
+
+        s = Series(['1 days', '-1 days', '0 days', 'nan', 'nan'],
+                   dtype='timedelta64[ns]')
+        result = s.mode(dropna)
+        expected1 = Series(expected1, dtype='timedelta64[ns]')
+        tm.assert_series_equal(result, expected1)
+
+        s = Series(['1 day', '1 day', '-1 day', '-1 day 2 min',
+                    '2 min', '2 min', 'nan', 'nan'],
+                   dtype='timedelta64[ns]')
+        result = s.mode(dropna)
+        expected2 = Series(expected2, dtype='timedelta64[ns]')
+        tm.assert_series_equal(result, expected2)
+
+    @pytest.mark.parametrize('dropna, expected1, expected2, expected3', [
+        (True, Categorical([1, 2], categories=[1, 2]),
+         Categorical(['a'], categories=[1, 'a']),
+         Categorical([3, 1], categories=[3, 2, 1], ordered=True)),
+        (False, Categorical([np.nan], categories=[1, 2]),
+         Categorical([np.nan, 'a'], categories=[1, 'a']),
+         Categorical([np.nan, 3, 1], categories=[3, 2, 1], ordered=True)),
+    ])
+    def test_mode_category(self, dropna, expected1, expected2, expected3):
+        s = Series(Categorical([1, 2, np.nan, np.nan]))
+        result = s.mode(dropna)
+        expected1 = Series(expected1, dtype='category')
+        tm.assert_series_equal(result, expected1)
+
+        s = Series(Categorical([1, 'a', 'a', np.nan, np.nan]))
+        result = s.mode(dropna)
+        expected2 = Series(expected2, dtype='category')
+        tm.assert_series_equal(result, expected2)
+
+        s = Series(Categorical([1, 1, 2, 3, 3, np.nan, np.nan],
+                               categories=[3, 2, 1], ordered=True))
+        result = s.mode(dropna)
+        expected3 = Series(expected3, dtype='category')
+        tm.assert_series_equal(result, expected3)
+
+    @pytest.mark.parametrize('dropna, expected1, expected2', [
+        (True, [2**63], [1, 2**63]),
+        (False, [2**63], [1, 2**63])
+    ])
+    def test_mode_intoverflow(self, dropna, expected1, expected2):
+        # Test for uint64 overflow.
+        s = Series([1, 2**63, 2**63], dtype=np.uint64)
+        result = s.mode(dropna)
+        expected1 = Series(expected1, dtype=np.uint64)
+        tm.assert_series_equal(result, expected1)
+
+        s = Series([1, 2**63], dtype=np.uint64)
+        result = s.mode(dropna)
+        expected2 = Series(expected2, dtype=np.uint64)
+        tm.assert_series_equal(result, expected2)
+
+    @pytest.mark.skipif(not compat.PY3, reason="only PY3")
+    def test_mode_sortwarning(self):
+        # Check for the warning that is raised when the mode
+        # results cannot be sorted
+
+        expected = Series(['foo', np.nan])
+        s = Series([1, 'foo', 'foo', np.nan, np.nan])
+
+        with tm.assert_produces_warning(UserWarning, check_stacklevel=False):
+            result = s.mode(dropna=False)
+            result = result.sort_values().reset_index(drop=True)
+
+        tm.assert_series_equal(result, expected)
+
+
+def assert_check_nselect_boundary(vals, dtype, method):
+    # helper function for 'test_boundary_{dtype}' tests
+    s = Series(vals, dtype=dtype)
+    result = getattr(s, method)(3)
+    expected_idxr = [0, 1, 2] if method == 'nsmallest' else [3, 2, 1]
+    expected = s.loc[expected_idxr]
+    tm.assert_series_equal(result, expected)
+
+
 class TestNLargestNSmallest(object):
 
     @pytest.mark.parametrize(
@@ -1950,6 +2133,32 @@ def test_n(self, n):
         expected = s.sort_values().head(n)
         assert_series_equal(result, expected)
 
+    def test_boundary_integer(self, nselect_method, any_int_dtype):
+        # GH 21426
+        dtype_info = np.iinfo(any_int_dtype)
+        min_val, max_val = dtype_info.min, dtype_info.max
+        vals = [min_val, min_val + 1, max_val - 1, max_val]
+        assert_check_nselect_boundary(vals, any_int_dtype, nselect_method)
+
+    def test_boundary_float(self, nselect_method, float_dtype):
+        # GH 21426
+        dtype_info = np.finfo(float_dtype)
+        min_val, max_val = dtype_info.min, dtype_info.max
+        min_2nd, max_2nd = np.nextafter(
+            [min_val, max_val], 0, dtype=float_dtype)
+        vals = [min_val, min_2nd, max_2nd, max_val]
+        assert_check_nselect_boundary(vals, float_dtype, nselect_method)
+
+    @pytest.mark.parametrize('dtype', ['datetime64[ns]', 'timedelta64[ns]'])
+    def test_boundary_datetimelike(self, nselect_method, dtype):
+        # GH 21426
+        # use int64 bounds and +1 to min_val since true minimum is NaT
+        # (include min_val/NaT at end to maintain same expected_idxr)
+        dtype_info = np.iinfo('int64')
+        min_val, max_val = dtype_info.min, dtype_info.max
+        vals = [min_val + 1, min_val + 2, max_val - 1, max_val, min_val]
+        assert_check_nselect_boundary(vals, dtype, nselect_method)
+
 
 class TestCategoricalSeriesAnalytics(object):
 

From e9ee3a10f8d2eb0ef927e7ad5007fac6d64217ae Mon Sep 17 00:00:00 2001
From: Pietro Battiston <me@pietrobattiston.it>
Date: Mon, 18 Jun 2018 23:42:59 +0200
Subject: [PATCH 10/55] PERF: remove useless overrides (#21523)

closes #21522
(cherry picked from commit ea54d390ac69a4421f8e88810dd058e9894daf26)
---
 pandas/core/indexes/multi.py | 12 ------------
 1 file changed, 12 deletions(-)

diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py
index fbcf06a28c1e5..c8332d762f7ef 100644
--- a/pandas/core/indexes/multi.py
+++ b/pandas/core/indexes/multi.py
@@ -852,14 +852,6 @@ def _has_complex_internals(self):
         # to disable groupby tricks
         return True
 
-    @cache_readonly
-    def is_monotonic(self):
-        """
-        return if the index is monotonic increasing (only equal or
-        increasing) values.
-        """
-        return self.is_monotonic_increasing
-
     @cache_readonly
     def is_monotonic_increasing(self):
         """
@@ -887,10 +879,6 @@ def is_monotonic_decreasing(self):
         # monotonic decreasing if and only if reverse is monotonic increasing
         return self[::-1].is_monotonic_increasing
 
-    @cache_readonly
-    def is_unique(self):
-        return not self.duplicated().any()
-
     @cache_readonly
     def _have_mixed_levels(self):
         """ return a boolean list indicated if we have mixed levels """

From 76551c2540a51c028193a16843b7e6d9fcbe47ba Mon Sep 17 00:00:00 2001
From: Tom Augspurger <TomAugspurger@users.noreply.github.com>
Date: Mon, 18 Jun 2018 17:39:39 -0500
Subject: [PATCH 11/55] BUG: Timedelta.__bool__ (#21485)

Closes #21484

(cherry picked from commit d5a1232da14e86dea2b3db8b61741f3f9b56e55a)
---
 doc/source/whatsnew/v0.23.2.txt                 |  9 ++++++---
 pandas/_libs/tslibs/timedeltas.pyx              |  3 +++
 pandas/tests/scalar/timedelta/test_timedelta.py | 14 ++++++++++++++
 3 files changed, 23 insertions(+), 3 deletions(-)

diff --git a/doc/source/whatsnew/v0.23.2.txt b/doc/source/whatsnew/v0.23.2.txt
index d839a72323c78..ea6d8620289f8 100644
--- a/doc/source/whatsnew/v0.23.2.txt
+++ b/doc/source/whatsnew/v0.23.2.txt
@@ -43,10 +43,13 @@ Bug Fixes
 -
 -
 
-**Conversion**
+**Timedelta**
 
+- Bug in :class:`Timedelta` where non-zero timedeltas shorter than 1 microsecond were considered False (:issue:`21484`)
 
--
+**Conversion**
+
+- Bug in :meth:`Series.nlargest` for signed and unsigned integer dtypes when the minimum value is present (:issue:`21426`)
 -
 
 **Indexing**
@@ -75,10 +78,10 @@ Bug Fixes
 -
 
 **Timezones**
+
 - Bug in :class:`Timestamp` and :class:`DatetimeIndex` where passing a :class:`Timestamp` localized after a DST transition would return a datetime before the DST transition (:issue:`20854`)
 - Bug in comparing :class:`DataFrame`s with tz-aware :class:`DatetimeIndex` columns with a DST transition that raised a ``KeyError`` (:issue:`19970`)
 
 **Other**
 
-- Bug in :meth:`Series.nlargest` for signed and unsigned integer dtypes when the minimum value is present (:issue:`21426`)
 -
diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx
index e2b0b33053f83..769f3ca5fa8bf 100644
--- a/pandas/_libs/tslibs/timedeltas.pyx
+++ b/pandas/_libs/tslibs/timedeltas.pyx
@@ -899,6 +899,9 @@ cdef class _Timedelta(timedelta):
     def __str__(self):
         return self._repr_base(format='long')
 
+    def __bool__(self):
+        return self.value != 0
+
     def isoformat(self):
         """
         Format Timedelta as ISO 8601 Duration like
diff --git a/pandas/tests/scalar/timedelta/test_timedelta.py b/pandas/tests/scalar/timedelta/test_timedelta.py
index 205fdf49d3e91..6472bd4245622 100644
--- a/pandas/tests/scalar/timedelta/test_timedelta.py
+++ b/pandas/tests/scalar/timedelta/test_timedelta.py
@@ -588,3 +588,17 @@ def test_components(self):
         result = s.dt.components
         assert not result.iloc[0].isna().all()
         assert result.iloc[1].isna().all()
+
+
+@pytest.mark.parametrize('value, expected', [
+    (Timedelta('10S'), True),
+    (Timedelta('-10S'), True),
+    (Timedelta(10, unit='ns'), True),
+    (Timedelta(0, unit='ns'), False),
+    (Timedelta(-10, unit='ns'), True),
+    (Timedelta(None), True),
+    (pd.NaT, True),
+])
+def test_truthiness(value, expected):
+    # https://github.com/pandas-dev/pandas/issues/21484
+    assert bool(value) is expected

From eb6f3681557f61aca378dd81ad92ff09fb05ad15 Mon Sep 17 00:00:00 2001
From: David Krych <david.krych@gmail.com>
Date: Mon, 18 Jun 2018 18:43:27 -0400
Subject: [PATCH 12/55] BUG: Fix Index construction when given empty generator
 (#21470). (#21481)

(cherry picked from commit 076635ac3a33b819f4ae0fb1f95106bf8e4bf329)
---
 doc/source/whatsnew/v0.23.2.txt   |  3 ++-
 pandas/core/arrays/categorical.py |  5 ++---
 pandas/core/indexes/base.py       | 10 ++++++----
 pandas/tests/indexes/test_base.py | 19 +++++++++++--------
 4 files changed, 21 insertions(+), 16 deletions(-)

diff --git a/doc/source/whatsnew/v0.23.2.txt b/doc/source/whatsnew/v0.23.2.txt
index ea6d8620289f8..2af89c15bb8fb 100644
--- a/doc/source/whatsnew/v0.23.2.txt
+++ b/doc/source/whatsnew/v0.23.2.txt
@@ -49,8 +49,9 @@ Bug Fixes
 
 **Conversion**
 
+- Bug in constructing :class:`Index` with an iterator or generator (:issue:`21470`)
 - Bug in :meth:`Series.nlargest` for signed and unsigned integer dtypes when the minimum value is present (:issue:`21426`)
--
+
 
 **Indexing**
 
diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
index a1a8f098b582e..b587a4c0bc722 100644
--- a/pandas/core/arrays/categorical.py
+++ b/pandas/core/arrays/categorical.py
@@ -3,7 +3,6 @@
 import numpy as np
 from warnings import warn
 import textwrap
-import types
 
 from pandas import compat
 from pandas.compat import u, lzip
@@ -28,7 +27,7 @@
     is_categorical,
     is_categorical_dtype,
     is_list_like, is_sequence,
-    is_scalar,
+    is_scalar, is_iterator,
     is_dict_like)
 
 from pandas.core.algorithms import factorize, take_1d, unique1d, take
@@ -2473,7 +2472,7 @@ def _convert_to_list_like(list_like):
     if isinstance(list_like, list):
         return list_like
     if (is_sequence(list_like) or isinstance(list_like, tuple) or
-            isinstance(list_like, types.GeneratorType)):
+            is_iterator(list_like)):
         return list(list_like)
     elif is_scalar(list_like):
         return [list_like]
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index a2e237c8cc45d..4dacec6a93c68 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -436,12 +436,14 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None,
         elif data is None or is_scalar(data):
             cls._scalar_data_error(data)
         else:
-            if tupleize_cols and is_list_like(data) and data:
+            if tupleize_cols and is_list_like(data):
+                # GH21470: convert iterable to list before determining if empty
                 if is_iterator(data):
                     data = list(data)
-                # we must be all tuples, otherwise don't construct
-                # 10697
-                if all(isinstance(e, tuple) for e in data):
+
+                if data and all(isinstance(e, tuple) for e in data):
+                    # we must be all tuples, otherwise don't construct
+                    # 10697
                     from .multi import MultiIndex
                     return MultiIndex.from_tuples(
                         data, names=name or kwargs.get('names'))
diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py
index 19acfb294762c..a0d6907055a2e 100644
--- a/pandas/tests/indexes/test_base.py
+++ b/pandas/tests/indexes/test_base.py
@@ -419,21 +419,24 @@ def test_constructor_dtypes_timedelta(self, attr, klass):
         result = klass(list(values), dtype=dtype)
         tm.assert_index_equal(result, index)
 
-    def test_constructor_empty_gen(self):
-        skip_index_keys = ["repeats", "periodIndex", "rangeIndex",
-                           "tuples"]
-        for key, index in self.generate_index_types(skip_index_keys):
-            empty = index.__class__([])
-            assert isinstance(empty, index.__class__)
-            assert not len(empty)
+    @pytest.mark.parametrize("value", [[], iter([]), (x for x in [])])
+    @pytest.mark.parametrize("klass",
+                             [Index, Float64Index, Int64Index, UInt64Index,
+                              CategoricalIndex, DatetimeIndex, TimedeltaIndex])
+    def test_constructor_empty(self, value, klass):
+        empty = klass(value)
+        assert isinstance(empty, klass)
+        assert not len(empty)
 
     @pytest.mark.parametrize("empty,klass", [
         (PeriodIndex([], freq='B'), PeriodIndex),
+        (PeriodIndex(iter([]), freq='B'), PeriodIndex),
+        (PeriodIndex((x for x in []), freq='B'), PeriodIndex),
         (RangeIndex(step=1), pd.RangeIndex),
         (MultiIndex(levels=[[1, 2], ['blue', 'red']],
                     labels=[[], []]), MultiIndex)
     ])
-    def test_constructor_empty(self, empty, klass):
+    def test_constructor_empty_special(self, empty, klass):
         assert isinstance(empty, klass)
         assert not len(empty)
 

From 2292005d0e780036939a258d09c8a6db16ecdd74 Mon Sep 17 00:00:00 2001
From: Ming Li <14131823+minggli@users.noreply.github.com>
Date: Mon, 18 Jun 2018 23:45:25 +0100
Subject: [PATCH 13/55] BUG/REG: file-handle object handled incorrectly in
 to_csv (#21478)

(cherry picked from commit 91451cb7dbaaf6fb3f9bdfca73fe6adc2ee68cce)
---
 doc/source/whatsnew/v0.23.2.txt   |  2 +-
 pandas/io/common.py               |  4 +++
 pandas/io/formats/csvs.py         | 59 ++++++++++++++++++++-----------
 pandas/tests/frame/test_to_csv.py | 16 +++++----
 pandas/tests/series/test_io.py    | 18 +++++-----
 pandas/tests/test_common.py       | 34 +++++++++++++-----
 6 files changed, 87 insertions(+), 46 deletions(-)

diff --git a/doc/source/whatsnew/v0.23.2.txt b/doc/source/whatsnew/v0.23.2.txt
index 2af89c15bb8fb..e3205aecee121 100644
--- a/doc/source/whatsnew/v0.23.2.txt
+++ b/doc/source/whatsnew/v0.23.2.txt
@@ -16,7 +16,7 @@ and bug fixes. We recommend that all users upgrade to this version.
 Fixed Regressions
 ~~~~~~~~~~~~~~~~~
 
--
+- Fixed regression in :meth:`to_csv` when handling file-like object incorrectly (:issue:`21471`)
 -
 
 .. _whatsnew_0232.performance:
diff --git a/pandas/io/common.py b/pandas/io/common.py
index a492b7c0b8e8e..ac9077f2db50e 100644
--- a/pandas/io/common.py
+++ b/pandas/io/common.py
@@ -445,6 +445,10 @@ def __init__(self, file, mode, compression=zipfile.ZIP_DEFLATED, **kwargs):
     def write(self, data):
         super(BytesZipFile, self).writestr(self.filename, data)
 
+    @property
+    def closed(self):
+        return self.fp is None
+
 
 class MMapWrapper(BaseIterator):
     """
diff --git a/pandas/io/formats/csvs.py b/pandas/io/formats/csvs.py
index 7f660e2644fa4..60518f596e9af 100644
--- a/pandas/io/formats/csvs.py
+++ b/pandas/io/formats/csvs.py
@@ -5,11 +5,13 @@
 
 from __future__ import print_function
 
+import warnings
+
 import csv as csvlib
+from zipfile import ZipFile
 import numpy as np
 
 from pandas.core.dtypes.missing import notna
-from pandas.core.dtypes.inference import is_file_like
 from pandas.core.index import Index, MultiIndex
 from pandas import compat
 from pandas.compat import (StringIO, range, zip)
@@ -128,19 +130,31 @@ def save(self):
         else:
             encoding = self.encoding
 
-        # PR 21300 uses string buffer to receive csv writing and dump into
-        # file-like output with compression as option. GH 21241, 21118
-        f = StringIO()
-        if not is_file_like(self.path_or_buf):
-            # path_or_buf is path
-            path_or_buf = self.path_or_buf
-        elif hasattr(self.path_or_buf, 'name'):
-            # path_or_buf is file handle
-            path_or_buf = self.path_or_buf.name
-        else:
-            # path_or_buf is file-like IO objects.
+        # GH 21227 internal compression is not used when file-like passed.
+        if self.compression and hasattr(self.path_or_buf, 'write'):
+            msg = ("compression has no effect when passing file-like "
+                   "object as input.")
+            warnings.warn(msg, RuntimeWarning, stacklevel=2)
+
+        # when zip compression is called.
+        is_zip = isinstance(self.path_or_buf, ZipFile) or (
+            not hasattr(self.path_or_buf, 'write')
+            and self.compression == 'zip')
+
+        if is_zip:
+            # zipfile doesn't support writing string to archive. uses string
+            # buffer to receive csv writing and dump into zip compression
+            # file handle. GH 21241, 21118
+            f = StringIO()
+            close = False
+        elif hasattr(self.path_or_buf, 'write'):
             f = self.path_or_buf
-            path_or_buf = None
+            close = False
+        else:
+            f, handles = _get_handle(self.path_or_buf, self.mode,
+                                     encoding=encoding,
+                                     compression=self.compression)
+            close = True
 
         try:
             writer_kwargs = dict(lineterminator=self.line_terminator,
@@ -157,13 +171,18 @@ def save(self):
             self._save()
 
         finally:
-            # GH 17778 handles zip compression for byte strings separately.
-            buf = f.getvalue()
-            if path_or_buf:
-                f, handles = _get_handle(path_or_buf, self.mode,
-                                         encoding=encoding,
-                                         compression=self.compression)
-                f.write(buf)
+            if is_zip:
+                # GH 17778 handles zip compression separately.
+                buf = f.getvalue()
+                if hasattr(self.path_or_buf, 'write'):
+                    self.path_or_buf.write(buf)
+                else:
+                    f, handles = _get_handle(self.path_or_buf, self.mode,
+                                             encoding=encoding,
+                                             compression=self.compression)
+                    f.write(buf)
+                    close = True
+            if close:
                 f.close()
                 for _fh in handles:
                     _fh.close()
diff --git a/pandas/tests/frame/test_to_csv.py b/pandas/tests/frame/test_to_csv.py
index 60dc336a85388..3ad25ae73109e 100644
--- a/pandas/tests/frame/test_to_csv.py
+++ b/pandas/tests/frame/test_to_csv.py
@@ -9,6 +9,7 @@
 import numpy as np
 
 from pandas.compat import (lmap, range, lrange, StringIO, u)
+from pandas.io.common import _get_handle
 import pandas.core.common as com
 from pandas.errors import ParserError
 from pandas import (DataFrame, Index, Series, MultiIndex, Timestamp,
@@ -935,18 +936,19 @@ def test_to_csv_compression(self, df, encoding, compression):
         with ensure_clean() as filename:
 
             df.to_csv(filename, compression=compression, encoding=encoding)
-
             # test the round trip - to_csv -> read_csv
             result = read_csv(filename, compression=compression,
                               index_col=0, encoding=encoding)
+            assert_frame_equal(df, result)
 
-            with open(filename, 'w') as fh:
-                df.to_csv(fh, compression=compression, encoding=encoding)
-
-            result_fh = read_csv(filename, compression=compression,
-                                 index_col=0, encoding=encoding)
+            # test the round trip using file handle - to_csv -> read_csv
+            f, _handles = _get_handle(filename, 'w', compression=compression,
+                                      encoding=encoding)
+            with f:
+                df.to_csv(f, encoding=encoding)
+            result = pd.read_csv(filename, compression=compression,
+                                 encoding=encoding, index_col=0, squeeze=True)
             assert_frame_equal(df, result)
-            assert_frame_equal(df, result_fh)
 
             # explicitly make sure file is compressed
             with tm.decompress_file(filename, compression) as fh:
diff --git a/pandas/tests/series/test_io.py b/pandas/tests/series/test_io.py
index 76dd4bc1f3d4a..90f37053ce17e 100644
--- a/pandas/tests/series/test_io.py
+++ b/pandas/tests/series/test_io.py
@@ -11,6 +11,7 @@
 from pandas import Series, DataFrame
 
 from pandas.compat import StringIO, u
+from pandas.io.common import _get_handle
 from pandas.util.testing import (assert_series_equal, assert_almost_equal,
                                  assert_frame_equal, ensure_clean)
 import pandas.util.testing as tm
@@ -152,20 +153,19 @@ def test_to_csv_compression(self, s, encoding, compression):
 
             s.to_csv(filename, compression=compression, encoding=encoding,
                      header=True)
-
             # test the round trip - to_csv -> read_csv
             result = pd.read_csv(filename, compression=compression,
                                  encoding=encoding, index_col=0, squeeze=True)
+            assert_series_equal(s, result)
 
-            with open(filename, 'w') as fh:
-                s.to_csv(fh, compression=compression, encoding=encoding,
-                         header=True)
-
-            result_fh = pd.read_csv(filename, compression=compression,
-                                    encoding=encoding, index_col=0,
-                                    squeeze=True)
+            # test the round trip using file handle - to_csv -> read_csv
+            f, _handles = _get_handle(filename, 'w', compression=compression,
+                                      encoding=encoding)
+            with f:
+                s.to_csv(f, encoding=encoding, header=True)
+            result = pd.read_csv(filename, compression=compression,
+                                 encoding=encoding, index_col=0, squeeze=True)
             assert_series_equal(s, result)
-            assert_series_equal(s, result_fh)
 
             # explicitly ensure file was compressed
             with tm.decompress_file(filename, compression) as fh:
diff --git a/pandas/tests/test_common.py b/pandas/tests/test_common.py
index 3443331e3d4ba..576239e49455e 100644
--- a/pandas/tests/test_common.py
+++ b/pandas/tests/test_common.py
@@ -11,6 +11,7 @@
 from pandas.compat import range, lmap
 import pandas.core.common as com
 from pandas.core import ops
+from pandas.io.common import _get_handle
 import pandas.util.testing as tm
 
 
@@ -248,19 +249,34 @@ def test_compression_size(obj, method, compression):
                      [12.32112, 123123.2, 321321.2]],
               columns=['X', 'Y', 'Z']),
     Series(100 * [0.123456, 0.234567, 0.567567], name='X')])
-@pytest.mark.parametrize('method', ['to_csv'])
+@pytest.mark.parametrize('method', ['to_csv', 'to_json'])
 def test_compression_size_fh(obj, method, compression_only):
 
     with tm.ensure_clean() as filename:
-        with open(filename, 'w') as fh:
-            getattr(obj, method)(fh, compression=compression_only)
-            assert not fh.closed
-        assert fh.closed
+        f, _handles = _get_handle(filename, 'w', compression=compression_only)
+        with f:
+            getattr(obj, method)(f)
+            assert not f.closed
+        assert f.closed
         compressed = os.path.getsize(filename)
     with tm.ensure_clean() as filename:
-        with open(filename, 'w') as fh:
-            getattr(obj, method)(fh, compression=None)
-            assert not fh.closed
-        assert fh.closed
+        f, _handles = _get_handle(filename, 'w', compression=None)
+        with f:
+            getattr(obj, method)(f)
+            assert not f.closed
+        assert f.closed
         uncompressed = os.path.getsize(filename)
         assert uncompressed > compressed
+
+
+# GH 21227
+def test_compression_warning(compression_only):
+    df = DataFrame(100 * [[0.123456, 0.234567, 0.567567],
+                          [12.32112, 123123.2, 321321.2]],
+                   columns=['X', 'Y', 'Z'])
+    with tm.ensure_clean() as filename:
+        f, _handles = _get_handle(filename, 'w', compression=compression_only)
+        with tm.assert_produces_warning(RuntimeWarning,
+                                        check_stacklevel=False):
+            with f:
+                df.to_csv(f, compression=compression_only)

From 030a0589cdc8479c65223669b5bbf0d10a95f31c Mon Sep 17 00:00:00 2001
From: Jacopo Rota <jacopo.r00ta@gmail.com>
Date: Tue, 19 Jun 2018 13:26:48 +0200
Subject: [PATCH 14/55] BUG: Handle read_csv corner case (#21176)

Closes gh-21141
(cherry picked from commit c2da06c8eea4cc0339717aa09acdd6765bc3d673)
---
 doc/source/whatsnew/v0.23.2.txt  |  1 +
 pandas/io/parsers.py             | 12 +++++++++++-
 pandas/tests/io/parser/common.py | 15 +++++++++++++++
 3 files changed, 27 insertions(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v0.23.2.txt b/doc/source/whatsnew/v0.23.2.txt
index e3205aecee121..f7c04ba9cfa9f 100644
--- a/doc/source/whatsnew/v0.23.2.txt
+++ b/doc/source/whatsnew/v0.23.2.txt
@@ -61,6 +61,7 @@ Bug Fixes
 
 **I/O**
 
+- Bug in :func:`read_csv` that caused it to incorrectly raise an error when ``nrows=0``, ``low_memory=True``, and ``index_col`` was not ``None`` (:issue:`21141`)
 -
 -
 
diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py
index 2c8f98732c92f..65df2bffb4abf 100755
--- a/pandas/io/parsers.py
+++ b/pandas/io/parsers.py
@@ -3209,12 +3209,22 @@ def _get_empty_meta(columns, index_col, index_names, dtype=None):
             col = columns[k] if is_integer(k) else k
             dtype[col] = v
 
-    if index_col is None or index_col is False:
+    # Even though we have no data, the "index" of the empty DataFrame
+    # could for example still be an empty MultiIndex. Thus, we need to
+    # check whether we have any index columns specified, via either:
+    #
+    # 1) index_col (column indices)
+    # 2) index_names (column names)
+    #
+    # Both must be non-null to ensure a successful construction. Otherwise,
+    # we have to create a generic emtpy Index.
+    if (index_col is None or index_col is False) or index_names is None:
         index = Index([])
     else:
         data = [Series([], dtype=dtype[name]) for name in index_names]
         index = _ensure_index_from_sequences(data, names=index_names)
         index_col.sort()
+
         for i, n in enumerate(index_col):
             columns.pop(n - i)
 
diff --git a/pandas/tests/io/parser/common.py b/pandas/tests/io/parser/common.py
index 2b7ff1f5a9879..b39122e5e7906 100644
--- a/pandas/tests/io/parser/common.py
+++ b/pandas/tests/io/parser/common.py
@@ -238,6 +238,21 @@ def test_csv_mixed_type(self):
         out = self.read_csv(StringIO(data))
         tm.assert_frame_equal(out, expected)
 
+    def test_read_csv_low_memory_no_rows_with_index(self):
+        if self.engine == "c" and not self.low_memory:
+            pytest.skip("This is a low-memory specific test")
+
+        # see gh-21141
+        data = """A,B,C
+1,1,1,2
+2,2,3,4
+3,3,4,5
+"""
+        out = self.read_csv(StringIO(data), low_memory=True,
+                            index_col=0, nrows=0)
+        expected = DataFrame(columns=["A", "B", "C"])
+        tm.assert_frame_equal(out, expected)
+
     def test_read_csv_dataframe(self):
         df = self.read_csv(self.csv1, index_col=0, parse_dates=True)
         df2 = self.read_table(self.csv1, sep=',', index_col=0,

From d44fddb12ff0ff3991dfaa81b52d8f63b0f3d308 Mon Sep 17 00:00:00 2001
From: Kalyan Gokhale <4734245+KalyanGokhale@users.noreply.github.com>
Date: Wed, 20 Jun 2018 16:03:07 +0530
Subject: [PATCH 15/55] REGR: Fixes first_valid_index when DataFrame or Series
 has duplicate row index (GH21441) (#21497)

(cherry picked from commit ec2020735d72ff73e0a6a607689281aad173c702)
---
 doc/source/whatsnew/v0.23.2.txt       |  3 ++-
 pandas/core/generic.py                | 23 +++++++++++------------
 pandas/tests/frame/test_timeseries.py | 15 ++++++++++++++-
 3 files changed, 27 insertions(+), 14 deletions(-)

diff --git a/doc/source/whatsnew/v0.23.2.txt b/doc/source/whatsnew/v0.23.2.txt
index f7c04ba9cfa9f..7d870fefba651 100644
--- a/doc/source/whatsnew/v0.23.2.txt
+++ b/doc/source/whatsnew/v0.23.2.txt
@@ -17,7 +17,8 @@ Fixed Regressions
 ~~~~~~~~~~~~~~~~~
 
 - Fixed regression in :meth:`to_csv` when handling file-like object incorrectly (:issue:`21471`)
--
+- Bug in both :meth:`DataFrame.first_valid_index` and :meth:`Series.first_valid_index` raised for a row index having duplicate values (:issue:`21441`)
+- 
 
 .. _whatsnew_0232.performance:
 
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index 9e4eda1bc4dc7..b03e598dcc52c 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -8969,18 +8969,17 @@ def _find_valid_index(self, how):
             is_valid = is_valid.any(1)  # reduce axis 1
 
         if how == 'first':
-            # First valid value case
-            i = is_valid.idxmax()
-            if not is_valid[i]:
-                return None
-            return i
-
-        elif how == 'last':
-            # Last valid value case
-            i = is_valid.values[::-1].argmax()
-            if not is_valid.iat[len(self) - i - 1]:
-                return None
-            return self.index[len(self) - i - 1]
+            idxpos = is_valid.values[::].argmax()
+
+        if how == 'last':
+            idxpos = len(self) - 1 - is_valid.values[::-1].argmax()
+
+        chk_notna = is_valid.iat[idxpos]
+        idx = self.index[idxpos]
+
+        if not chk_notna:
+            return None
+        return idx
 
     @Appender(_shared_docs['valid_index'] % {'position': 'first',
                                              'klass': 'NDFrame'})
diff --git a/pandas/tests/frame/test_timeseries.py b/pandas/tests/frame/test_timeseries.py
index 90fbc6e628369..fb9bd74d9876d 100644
--- a/pandas/tests/frame/test_timeseries.py
+++ b/pandas/tests/frame/test_timeseries.py
@@ -506,7 +506,15 @@ def test_asfreq_fillvalue(self):
         actual_series = ts.asfreq(freq='1S', fill_value=9.0)
         assert_series_equal(expected_series, actual_series)
 
-    def test_first_last_valid(self):
+    @pytest.mark.parametrize("data,idx,expected_first,expected_last", [
+        ({'A': [1, 2, 3]}, [1, 1, 2], 1, 2),
+        ({'A': [1, 2, 3]}, [1, 2, 2], 1, 2),
+        ({'A': [1, 2, 3, 4]}, ['d', 'd', 'd', 'd'], 'd', 'd'),
+        ({'A': [1, np.nan, 3]}, [1, 1, 2], 1, 2),
+        ({'A': [np.nan, np.nan, 3]}, [1, 1, 2], 2, 2),
+        ({'A': [1, np.nan, 3]}, [1, 2, 2], 1, 2)])
+    def test_first_last_valid(self, data, idx,
+                              expected_first, expected_last):
         N = len(self.frame.index)
         mat = randn(N)
         mat[:5] = nan
@@ -539,6 +547,11 @@ def test_first_last_valid(self):
         assert frame.first_valid_index().freq == frame.index.freq
         assert frame.last_valid_index().freq == frame.index.freq
 
+        # GH 21441
+        df = DataFrame(data, index=idx)
+        assert expected_first == df.first_valid_index()
+        assert expected_last == df.last_valid_index()
+
     def test_first_subset(self):
         ts = tm.makeTimeDataFrame(freq='12h')
         result = ts.first('10d')

From 172c5159ba7c1a1c0a398af4ee2ac77f00c1ef85 Mon Sep 17 00:00:00 2001
From: Michael Odintsov <michael@datarobot.com>
Date: Thu, 21 Jun 2018 05:54:23 +0300
Subject: [PATCH 16/55] BUG: Fix group index calculation to prevent hitting
 maximum recursion depth (#21541)

(cherry picked from commit f91a7049d1730aa1924584a07a1265d9f57a2f35)
---
 doc/source/whatsnew/v0.23.2.txt      |  1 +
 pandas/core/sorting.py               | 29 ++++++++++++++++------------
 pandas/tests/frame/test_analytics.py | 17 ++++++++++++++++
 3 files changed, 35 insertions(+), 12 deletions(-)

diff --git a/doc/source/whatsnew/v0.23.2.txt b/doc/source/whatsnew/v0.23.2.txt
index 7d870fefba651..a1b71ba5cbc43 100644
--- a/doc/source/whatsnew/v0.23.2.txt
+++ b/doc/source/whatsnew/v0.23.2.txt
@@ -58,6 +58,7 @@ Bug Fixes
 
 - Bug in :meth:`Index.get_indexer_non_unique` with categorical key (:issue:`21448`)
 - Bug in comparison operations for :class:`MultiIndex` where error was raised on equality / inequality comparison involving a MultiIndex with ``nlevels == 1`` (:issue:`21149`)
+- Bug in :func:`DataFrame.duplicated` with a large number of columns causing a 'maximum recursion depth exceeded' (:issue:`21524`).
 -
 
 **I/O**
diff --git a/pandas/core/sorting.py b/pandas/core/sorting.py
index e550976d1deeb..212f44e55c489 100644
--- a/pandas/core/sorting.py
+++ b/pandas/core/sorting.py
@@ -52,7 +52,21 @@ def _int64_cut_off(shape):
                 return i
         return len(shape)
 
-    def loop(labels, shape):
+    def maybe_lift(lab, size):
+        # promote nan values (assigned -1 label in lab array)
+        # so that all output values are non-negative
+        return (lab + 1, size + 1) if (lab == -1).any() else (lab, size)
+
+    labels = map(_ensure_int64, labels)
+    if not xnull:
+        labels, shape = map(list, zip(*map(maybe_lift, labels, shape)))
+
+    labels = list(labels)
+    shape = list(shape)
+
+    # Iteratively process all the labels in chunks sized so less
+    # than _INT64_MAX unique int ids will be required for each chunk
+    while True:
         # how many levels can be done without overflow:
         nlev = _int64_cut_off(shape)
 
@@ -74,7 +88,7 @@ def loop(labels, shape):
             out[mask] = -1
 
         if nlev == len(shape):  # all levels done!
-            return out
+            break
 
         # compress what has been done so far in order to avoid overflow
         # to retain lexical ranks, obs_ids should be sorted
@@ -83,16 +97,7 @@ def loop(labels, shape):
         labels = [comp_ids] + labels[nlev:]
         shape = [len(obs_ids)] + shape[nlev:]
 
-        return loop(labels, shape)
-
-    def maybe_lift(lab, size):  # pormote nan values
-        return (lab + 1, size + 1) if (lab == -1).any() else (lab, size)
-
-    labels = map(_ensure_int64, labels)
-    if not xnull:
-        labels, shape = map(list, zip(*map(maybe_lift, labels, shape)))
-
-    return loop(list(labels), list(shape))
+    return out
 
 
 def get_compressed_ids(labels, sizes):
diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py
index 90d7c46f7554f..4197339ff6e03 100644
--- a/pandas/tests/frame/test_analytics.py
+++ b/pandas/tests/frame/test_analytics.py
@@ -1507,6 +1507,23 @@ def test_duplicated_with_misspelled_column_name(self, subset):
         with pytest.raises(KeyError):
             df.drop_duplicates(subset)
 
+    @pytest.mark.slow
+    def test_duplicated_do_not_fail_on_wide_dataframes(self):
+        # gh-21524
+        # Given the wide dataframe with a lot of columns
+        # with different (important!) values
+        data = {'col_{0:02d}'.format(i): np.random.randint(0, 1000, 30000)
+                for i in range(100)}
+        df = pd.DataFrame(data).T
+        result = df.duplicated()
+
+        # Then duplicates produce the bool pd.Series as a result
+        # and don't fail during calculation.
+        # Actual values doesn't matter here, though usually
+        # it's all False in this case
+        assert isinstance(result, pd.Series)
+        assert result.dtype == np.bool
+
     def test_drop_duplicates_with_duplicate_column_names(self):
         # GH17836
         df = DataFrame([

From a2199d2c01241d325bbff9474a94c47a8a7a4b82 Mon Sep 17 00:00:00 2001
From: alimcmaster1 <alimcmaster1@gmail.com>
Date: Thu, 21 Jun 2018 09:13:01 +0100
Subject: [PATCH 17/55] BUG: Fix passing empty label to df drop (#21515)

Closes #21494
(cherry picked from commit f4fba9e90f6a7e27af984acc77403139ef600d8f)
---
 doc/source/whatsnew/v0.23.2.txt               |   1 +
 pandas/core/generic.py                        |  21 ++--
 pandas/core/indexes/base.py                   |   4 +-
 pandas/core/indexes/multi.py                  |   1 -
 .../tests/frame/test_axis_select_reindex.py   |  15 +++
 .../tests/series/indexing/test_alter_index.py | 106 ++++++++++++------
 6 files changed, 98 insertions(+), 50 deletions(-)

diff --git a/doc/source/whatsnew/v0.23.2.txt b/doc/source/whatsnew/v0.23.2.txt
index a1b71ba5cbc43..20d427335a47f 100644
--- a/doc/source/whatsnew/v0.23.2.txt
+++ b/doc/source/whatsnew/v0.23.2.txt
@@ -58,6 +58,7 @@ Bug Fixes
 
 - Bug in :meth:`Index.get_indexer_non_unique` with categorical key (:issue:`21448`)
 - Bug in comparison operations for :class:`MultiIndex` where error was raised on equality / inequality comparison involving a MultiIndex with ``nlevels == 1`` (:issue:`21149`)
+- Bug in :meth:`DataFrame.drop` behaviour is not consistent for unique and non-unique indexes (:issue:`21494`)
 - Bug in :func:`DataFrame.duplicated` with a large number of columns causing a 'maximum recursion depth exceeded' (:issue:`21524`).
 -
 
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index b03e598dcc52c..612ee7cb42021 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -3129,7 +3129,7 @@ def _drop_axis(self, labels, axis, level=None, errors='raise'):
         """
         axis = self._get_axis_number(axis)
         axis_name = self._get_axis_name(axis)
-        axis, axis_ = self._get_axis(axis), axis
+        axis = self._get_axis(axis)
 
         if axis.is_unique:
             if level is not None:
@@ -3138,24 +3138,25 @@ def _drop_axis(self, labels, axis, level=None, errors='raise'):
                 new_axis = axis.drop(labels, level=level, errors=errors)
             else:
                 new_axis = axis.drop(labels, errors=errors)
-            dropped = self.reindex(**{axis_name: new_axis})
-            try:
-                dropped.axes[axis_].set_names(axis.names, inplace=True)
-            except AttributeError:
-                pass
-            result = dropped
+            result = self.reindex(**{axis_name: new_axis})
 
+        # Case for non-unique axis
         else:
             labels = _ensure_object(com._index_labels_to_array(labels))
             if level is not None:
                 if not isinstance(axis, MultiIndex):
                     raise AssertionError('axis must be a MultiIndex')
                 indexer = ~axis.get_level_values(level).isin(labels)
+
+                # GH 18561 MultiIndex.drop should raise if label is absent
+                if errors == 'raise' and indexer.all():
+                    raise KeyError('{} not found in axis'.format(labels))
             else:
                 indexer = ~axis.isin(labels)
-
-            if errors == 'raise' and indexer.all():
-                raise KeyError('{} not found in axis'.format(labels))
+                # Check if label doesn't exist along axis
+                labels_missing = (axis.get_indexer_for(labels) == -1).any()
+                if errors == 'raise' and labels_missing:
+                    raise KeyError('{} not found in axis'.format(labels))
 
             slicer = [slice(None)] * self.ndim
             slicer[self._get_axis_number(axis_name)] = indexer
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index 4dacec6a93c68..59527afe6c1f7 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -4392,7 +4392,7 @@ def drop(self, labels, errors='raise'):
         Raises
         ------
         KeyError
-            If none of the labels are found in the selected axis
+            If not all of the labels are found in the selected axis
         """
         arr_dtype = 'object' if self.dtype == 'object' else None
         labels = com._index_labels_to_array(labels, dtype=arr_dtype)
@@ -4401,7 +4401,7 @@ def drop(self, labels, errors='raise'):
         if mask.any():
             if errors != 'ignore':
                 raise KeyError(
-                    'labels %s not contained in axis' % labels[mask])
+                    '{} not found in axis'.format(labels[mask]))
             indexer = indexer[~mask]
         return self.delete(indexer)
 
diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py
index c8332d762f7ef..80bf73cfe7dd3 100644
--- a/pandas/core/indexes/multi.py
+++ b/pandas/core/indexes/multi.py
@@ -1707,7 +1707,6 @@ def drop(self, labels, level=None, errors='raise'):
                 if errors != 'ignore':
                     raise ValueError('labels %s not contained in axis' %
                                      labels[mask])
-                indexer = indexer[~mask]
         except Exception:
             pass
 
diff --git a/pandas/tests/frame/test_axis_select_reindex.py b/pandas/tests/frame/test_axis_select_reindex.py
index 28e82f7585850..0e0d6598f5101 100644
--- a/pandas/tests/frame/test_axis_select_reindex.py
+++ b/pandas/tests/frame/test_axis_select_reindex.py
@@ -1151,3 +1151,18 @@ def test_raise_on_drop_duplicate_index(self, actual):
         expected_no_err = actual.T.drop('c', axis=1, level=level,
                                         errors='ignore')
         assert_frame_equal(expected_no_err.T, actual)
+
+    @pytest.mark.parametrize('index', [[1, 2, 3], [1, 1, 2]])
+    @pytest.mark.parametrize('drop_labels', [[], [1], [2]])
+    def test_drop_empty_list(self, index, drop_labels):
+        # GH 21494
+        expected_index = [i for i in index if i not in drop_labels]
+        frame = pd.DataFrame(index=index).drop(drop_labels)
+        tm.assert_frame_equal(frame, pd.DataFrame(index=expected_index))
+
+    @pytest.mark.parametrize('index', [[1, 2, 3], [1, 2, 2]])
+    @pytest.mark.parametrize('drop_labels', [[1, 4], [4, 5]])
+    def test_drop_non_empty_list(self, index, drop_labels):
+        # GH 21494
+        with tm.assert_raises_regex(KeyError, 'not found in axis'):
+            pd.DataFrame(index=index).drop(drop_labels)
diff --git a/pandas/tests/series/indexing/test_alter_index.py b/pandas/tests/series/indexing/test_alter_index.py
index 999ed5f26daee..2fdf198596ce2 100644
--- a/pandas/tests/series/indexing/test_alter_index.py
+++ b/pandas/tests/series/indexing/test_alter_index.py
@@ -463,54 +463,86 @@ def test_rename():
     assert result.name == expected.name
 
 
-def test_drop():
-    # unique
-    s = Series([1, 2], index=['one', 'two'])
-    expected = Series([1], index=['one'])
-    result = s.drop(['two'])
-    assert_series_equal(result, expected)
-    result = s.drop('two', axis='rows')
-    assert_series_equal(result, expected)
-
-    # non-unique
-    # GH 5248
-    s = Series([1, 1, 2], index=['one', 'two', 'one'])
-    expected = Series([1, 2], index=['one', 'one'])
-    result = s.drop(['two'], axis=0)
-    assert_series_equal(result, expected)
-    result = s.drop('two')
-    assert_series_equal(result, expected)
-
-    expected = Series([1], index=['two'])
-    result = s.drop(['one'])
-    assert_series_equal(result, expected)
-    result = s.drop('one')
-    assert_series_equal(result, expected)
+@pytest.mark.parametrize(
+    'data, index, drop_labels,'
+    ' axis, expected_data, expected_index',
+    [
+        # Unique Index
+        ([1, 2], ['one', 'two'], ['two'],
+         0, [1], ['one']),
+        ([1, 2], ['one', 'two'], ['two'],
+         'rows', [1], ['one']),
+        ([1, 1, 2], ['one', 'two', 'one'], ['two'],
+         0, [1, 2], ['one', 'one']),
+
+        # GH 5248 Non-Unique Index
+        ([1, 1, 2], ['one', 'two', 'one'], 'two',
+         0, [1, 2], ['one', 'one']),
+        ([1, 1, 2], ['one', 'two', 'one'], ['one'],
+         0, [1], ['two']),
+        ([1, 1, 2], ['one', 'two', 'one'], 'one',
+         0, [1], ['two'])])
+def test_drop_unique_and_non_unique_index(data, index, axis, drop_labels,
+                                          expected_data, expected_index):
+
+    s = Series(data=data, index=index)
+    result = s.drop(drop_labels, axis=axis)
+    expected = Series(data=expected_data, index=expected_index)
+    tm.assert_series_equal(result, expected)
 
-    # single string/tuple-like
-    s = Series(range(3), index=list('abc'))
-    pytest.raises(KeyError, s.drop, 'bc')
-    pytest.raises(KeyError, s.drop, ('a',))
 
+@pytest.mark.parametrize(
+    'data, index, drop_labels,'
+    ' axis, error_type, error_desc',
+    [
+        # single string/tuple-like
+        (range(3), list('abc'), 'bc',
+         0, KeyError, 'not found in axis'),
+
+        # bad axis
+        (range(3), list('abc'), ('a',),
+         0, KeyError, 'not found in axis'),
+        (range(3), list('abc'), 'one',
+         'columns', ValueError, 'No axis named columns')])
+def test_drop_exception_raised(data, index, drop_labels,
+                               axis, error_type, error_desc):
+
+    with tm.assert_raises_regex(error_type, error_desc):
+        Series(data, index=index).drop(drop_labels, axis=axis)
+
+
+def test_drop_with_ignore_errors():
     # errors='ignore'
     s = Series(range(3), index=list('abc'))
     result = s.drop('bc', errors='ignore')
-    assert_series_equal(result, s)
+    tm.assert_series_equal(result, s)
     result = s.drop(['a', 'd'], errors='ignore')
     expected = s.iloc[1:]
-    assert_series_equal(result, expected)
-
-    # bad axis
-    pytest.raises(ValueError, s.drop, 'one', axis='columns')
+    tm.assert_series_equal(result, expected)
 
     # GH 8522
     s = Series([2, 3], index=[True, False])
     assert s.index.is_object()
     result = s.drop(True)
     expected = Series([3], index=[False])
-    assert_series_equal(result, expected)
+    tm.assert_series_equal(result, expected)
+
 
-    # GH 16877
-    s = Series([2, 3], index=[0, 1])
-    with tm.assert_raises_regex(KeyError, 'not contained in axis'):
-        s.drop([False, True])
+@pytest.mark.parametrize('index', [[1, 2, 3], [1, 1, 3]])
+@pytest.mark.parametrize('drop_labels', [[], [1], [3]])
+def test_drop_empty_list(index, drop_labels):
+    # GH 21494
+    expected_index = [i for i in index if i not in drop_labels]
+    series = pd.Series(index=index).drop(drop_labels)
+    tm.assert_series_equal(series, pd.Series(index=expected_index))
+
+
+@pytest.mark.parametrize('data, index, drop_labels', [
+    (None, [1, 2, 3], [1, 4]),
+    (None, [1, 2, 2], [1, 4]),
+    ([2, 3], [0, 1], [False, True])
+])
+def test_drop_non_empty_list(data, index, drop_labels):
+    # GH 21494 and GH 16877
+    with tm.assert_raises_regex(KeyError, 'not found in axis'):
+        pd.Series(data=data, index=index).drop(drop_labels)

From 4b1a68776aa20bb2dc081bb77093adb6c47957f2 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Thu, 21 Jun 2018 03:18:53 -0700
Subject: [PATCH 18/55] fix hashing string-casting error (#21187)

(cherry picked from commit e24da6c9f92d2b04ffb39a7fe0db85015af7ff3f)
---
 doc/source/whatsnew/v0.23.2.txt  |  1 +
 pandas/_libs/hashing.pyx         |  7 ++-----
 pandas/tests/series/test_repr.py | 30 ++++++++++++++++++++++++++++++
 pandas/util/testing.py           | 22 ++++++++++++++++++++++
 4 files changed, 55 insertions(+), 5 deletions(-)

diff --git a/doc/source/whatsnew/v0.23.2.txt b/doc/source/whatsnew/v0.23.2.txt
index 20d427335a47f..60376f416aeb7 100644
--- a/doc/source/whatsnew/v0.23.2.txt
+++ b/doc/source/whatsnew/v0.23.2.txt
@@ -80,6 +80,7 @@ Bug Fixes
 
 **Categorical**
 
+- Bug in rendering :class:`Series` with ``Categorical`` dtype in rare conditions under Python 2.7 (:issue:`21002`)
 -
 
 **Timezones**
diff --git a/pandas/_libs/hashing.pyx b/pandas/_libs/hashing.pyx
index c6f182ac5003f..4489847518a1d 100644
--- a/pandas/_libs/hashing.pyx
+++ b/pandas/_libs/hashing.pyx
@@ -8,8 +8,7 @@ import numpy as np
 from numpy cimport ndarray, uint8_t, uint32_t, uint64_t
 
 from util cimport _checknull
-from cpython cimport (PyString_Check,
-                      PyBytes_Check,
+from cpython cimport (PyBytes_Check,
                       PyUnicode_Check)
 from libc.stdlib cimport malloc, free
 
@@ -62,9 +61,7 @@ def hash_object_array(ndarray[object] arr, object key, object encoding='utf8'):
     cdef list datas = []
     for i in range(n):
         val = arr[i]
-        if PyString_Check(val):
-            data = <bytes>val.encode(encoding)
-        elif PyBytes_Check(val):
+        if PyBytes_Check(val):
             data = <bytes>val
         elif PyUnicode_Check(val):
             data = <bytes>val.encode(encoding)
diff --git a/pandas/tests/series/test_repr.py b/pandas/tests/series/test_repr.py
index 97236f028b1c4..730c2b7865f1f 100644
--- a/pandas/tests/series/test_repr.py
+++ b/pandas/tests/series/test_repr.py
@@ -11,6 +11,7 @@
 from pandas import (Index, Series, DataFrame, date_range, option_context,
                     Categorical, period_range, timedelta_range)
 from pandas.core.index import MultiIndex
+from pandas.core.base import StringMixin
 
 from pandas.compat import lrange, range, u
 from pandas import compat
@@ -202,6 +203,35 @@ def test_latex_repr(self):
 
 class TestCategoricalRepr(object):
 
+    def test_categorical_repr_unicode(self):
+        # GH#21002 if len(index) > 60, sys.getdefaultencoding()=='ascii',
+        # and we are working in PY2, then rendering a Categorical could raise
+        # UnicodeDecodeError by trying to decode when it shouldn't
+
+        class County(StringMixin):
+            name = u'San Sebastián'
+            state = u'PR'
+
+            def __unicode__(self):
+                return self.name + u', ' + self.state
+
+        cat = pd.Categorical([County() for n in range(61)])
+        idx = pd.Index(cat)
+        ser = idx.to_series()
+
+        if compat.PY3:
+            # no reloading of sys, just check that the default (utf8) works
+            # as expected
+            repr(ser)
+            str(ser)
+
+        else:
+            # set sys.defaultencoding to ascii, then change it back after
+            # the test
+            with tm.set_defaultencoding('ascii'):
+                repr(ser)
+                str(ser)
+
     def test_categorical_repr(self):
         a = Series(Categorical([1, 2, 3, 4]))
         exp = u("0    1\n1    2\n2    3\n3    4\n" +
diff --git a/pandas/util/testing.py b/pandas/util/testing.py
index 233eba6490937..6384eca9849f6 100644
--- a/pandas/util/testing.py
+++ b/pandas/util/testing.py
@@ -553,6 +553,28 @@ def _valid_locales(locales, normalize):
 # Stdout / stderr decorators
 
 
+@contextmanager
+def set_defaultencoding(encoding):
+    """
+    Set default encoding (as given by sys.getdefaultencoding()) to the given
+    encoding; restore on exit.
+
+    Parameters
+    ----------
+    encoding : str
+    """
+    if not PY2:
+        raise ValueError("set_defaultencoding context is only available "
+                         "in Python 2.")
+    orig = sys.getdefaultencoding()
+    reload(sys)  # noqa:F821
+    sys.setdefaultencoding(encoding)
+    try:
+        yield
+    finally:
+        sys.setdefaultencoding(orig)
+
+
 def capture_stdout(f):
     """
     Decorator to capture stdout in a buffer so that it can be checked

From 2d2f6aa9e368e3d97d8a8d24a802357e4ac3a919 Mon Sep 17 00:00:00 2001
From: Jacopo Rota <jacopo.r00ta@gmail.com>
Date: Sat, 23 Jun 2018 01:04:38 +0200
Subject: [PATCH 19/55] add test case when to_csv argument is sys.stdout
 (#21572)

(cherry picked from commit 66fea91e915ca5e3f096055f3ad0f07335483e3f)
---
 pandas/tests/io/formats/test_to_csv.py | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/pandas/tests/io/formats/test_to_csv.py b/pandas/tests/io/formats/test_to_csv.py
index dfa3751bff57a..36c4ae547ad4e 100644
--- a/pandas/tests/io/formats/test_to_csv.py
+++ b/pandas/tests/io/formats/test_to_csv.py
@@ -285,3 +285,18 @@ def test_to_csv_string_array_utf8(self):
             df.to_csv(path, encoding='utf-8')
             with open(path, 'r') as f:
                 assert f.read() == expected_utf8
+
+    @tm.capture_stdout
+    def test_to_csv_stdout_file(self):
+        # GH 21561
+        df = pd.DataFrame([['foo', 'bar'], ['baz', 'qux']],
+                          columns=['name_1', 'name_2'])
+        expected_ascii = '''\
+,name_1,name_2
+0,foo,bar
+1,baz,qux
+'''
+        df.to_csv(sys.stdout, encoding='ascii')
+        output = sys.stdout.getvalue()
+        assert output == expected_ascii
+        assert not sys.stdout.closed

From cf0a55f86eb73782d0d76cc9208ca56d374c9a5e Mon Sep 17 00:00:00 2001
From: Vu Le <vuminhle@users.noreply.github.com>
Date: Sat, 23 Jun 2018 06:07:21 +0700
Subject: [PATCH 20/55] BUG: Fix json_normalize throwing TypeError (#21536)
 (#21540)

(cherry picked from commit 5fdaa9717f7550c5293d421205bfa19011278396)
---
 doc/source/whatsnew/v0.23.2.txt        | 2 +-
 pandas/io/json/normalize.py            | 8 +++++++-
 pandas/tests/io/json/test_normalize.py | 6 ++++++
 3 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/doc/source/whatsnew/v0.23.2.txt b/doc/source/whatsnew/v0.23.2.txt
index 60376f416aeb7..53ca4c0d1c144 100644
--- a/doc/source/whatsnew/v0.23.2.txt
+++ b/doc/source/whatsnew/v0.23.2.txt
@@ -65,7 +65,7 @@ Bug Fixes
 **I/O**
 
 - Bug in :func:`read_csv` that caused it to incorrectly raise an error when ``nrows=0``, ``low_memory=True``, and ``index_col`` was not ``None`` (:issue:`21141`)
--
+- Bug in :func:`json_normalize` when formatting the ``record_prefix`` with integer columns (:issue:`21536`)
 -
 
 **Plotting**
diff --git a/pandas/io/json/normalize.py b/pandas/io/json/normalize.py
index b845a43b9ca9e..2004a24c2ec5a 100644
--- a/pandas/io/json/normalize.py
+++ b/pandas/io/json/normalize.py
@@ -170,6 +170,11 @@ def json_normalize(data, record_path=None, meta=None,
     3      Summit        1234   John Kasich     Ohio        OH
     4    Cuyahoga        1337   John Kasich     Ohio        OH
 
+    >>> data = {'A': [1, 2]}
+    >>> json_normalize(data, 'A', record_prefix='Prefix.')
+        Prefix.0
+    0          1
+    1          2
     """
     def _pull_field(js, spec):
         result = js
@@ -259,7 +264,8 @@ def _recursive_extract(data, path, seen_meta, level=0):
     result = DataFrame(records)
 
     if record_prefix is not None:
-        result.rename(columns=lambda x: record_prefix + x, inplace=True)
+        result = result.rename(
+            columns=lambda x: "{p}{c}".format(p=record_prefix, c=x))
 
     # Data types, a problem
     for k, v in compat.iteritems(meta_vals):
diff --git a/pandas/tests/io/json/test_normalize.py b/pandas/tests/io/json/test_normalize.py
index 395c2c90767d3..200a853c48900 100644
--- a/pandas/tests/io/json/test_normalize.py
+++ b/pandas/tests/io/json/test_normalize.py
@@ -123,6 +123,12 @@ def test_simple_normalize_with_separator(self, deep_nested):
                           'country', 'states_name']).sort_values()
         assert result.columns.sort_values().equals(expected)
 
+    def test_value_array_record_prefix(self):
+        # GH 21536
+        result = json_normalize({'A': [1, 2]}, 'A', record_prefix='Prefix.')
+        expected = DataFrame([[1], [2]], columns=['Prefix.0'])
+        tm.assert_frame_equal(result, expected)
+
     def test_more_deeply_nested(self, deep_nested):
 
         result = json_normalize(deep_nested, ['states', 'cities'],

From 176695fde32e872478d303ab21965bd49416aae4 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Mon, 2 Jul 2018 13:48:59 +0200
Subject: [PATCH 21/55] Remove incorrectly added TestMode class

The tests were incorrectly added from https://github.com/pandas-dev/pandas/commit/f1631bec96dd9a1dc4890677b9c5475d0677e102#diff-dc347bc3d0448ea297bed67dc7ff3437
when fixing merge conflicts during cherry-picking
---
 pandas/tests/series/test_analytics.py | 174 --------------------------
 1 file changed, 174 deletions(-)

diff --git a/pandas/tests/series/test_analytics.py b/pandas/tests/series/test_analytics.py
index 7a78b562ac1fa..1e6ea96a5de51 100644
--- a/pandas/tests/series/test_analytics.py
+++ b/pandas/tests/series/test_analytics.py
@@ -1866,180 +1866,6 @@ def s_main_dtypes():
     return df
 
 
-class TestMode(object):
-
-    @pytest.mark.parametrize('dropna, expected', [
-        (True, Series([], dtype=np.float64)),
-        (False, Series([], dtype=np.float64))
-    ])
-    def test_mode_empty(self, dropna, expected):
-        s = Series([], dtype=np.float64)
-        result = s.mode(dropna)
-        tm.assert_series_equal(result, expected)
-
-    @pytest.mark.parametrize('dropna, data, expected', [
-        (True, [1, 1, 1, 2], [1]),
-        (True, [1, 1, 1, 2, 3, 3, 3], [1, 3]),
-        (False, [1, 1, 1, 2], [1]),
-        (False, [1, 1, 1, 2, 3, 3, 3], [1, 3]),
-    ])
-    @pytest.mark.parametrize(
-        'dt',
-        list(np.typecodes['AllInteger'] + np.typecodes['Float'])
-    )
-    def test_mode_numerical(self, dropna, data, expected, dt):
-        s = Series(data, dtype=dt)
-        result = s.mode(dropna)
-        expected = Series(expected, dtype=dt)
-        tm.assert_series_equal(result, expected)
-
-    @pytest.mark.parametrize('dropna, expected', [
-        (True, [1.0]),
-        (False, [1, np.nan]),
-    ])
-    def test_mode_numerical_nan(self, dropna, expected):
-        s = Series([1, 1, 2, np.nan, np.nan])
-        result = s.mode(dropna)
-        expected = Series(expected)
-        tm.assert_series_equal(result, expected)
-
-    @pytest.mark.parametrize('dropna, expected1, expected2, expected3', [
-        (True, ['b'], ['bar'], ['nan']),
-        (False, ['b'], [np.nan], ['nan'])
-    ])
-    def test_mode_str_obj(self, dropna, expected1, expected2, expected3):
-        # Test string and object types.
-        data = ['a'] * 2 + ['b'] * 3
-
-        s = Series(data, dtype='c')
-        result = s.mode(dropna)
-        expected1 = Series(expected1, dtype='c')
-        tm.assert_series_equal(result, expected1)
-
-        data = ['foo', 'bar', 'bar', np.nan, np.nan, np.nan]
-
-        s = Series(data, dtype=object)
-        result = s.mode(dropna)
-        expected2 = Series(expected2, dtype=object)
-        tm.assert_series_equal(result, expected2)
-
-        data = ['foo', 'bar', 'bar', np.nan, np.nan, np.nan]
-
-        s = Series(data, dtype=object).astype(str)
-        result = s.mode(dropna)
-        expected3 = Series(expected3, dtype=str)
-        tm.assert_series_equal(result, expected3)
-
-    @pytest.mark.parametrize('dropna, expected1, expected2', [
-        (True, ['foo'], ['foo']),
-        (False, ['foo'], [np.nan])
-    ])
-    def test_mode_mixeddtype(self, dropna, expected1, expected2):
-        s = Series([1, 'foo', 'foo'])
-        result = s.mode(dropna)
-        expected = Series(expected1)
-        tm.assert_series_equal(result, expected)
-
-        s = Series([1, 'foo', 'foo', np.nan, np.nan, np.nan])
-        result = s.mode(dropna)
-        expected = Series(expected2, dtype=object)
-        tm.assert_series_equal(result, expected)
-
-    @pytest.mark.parametrize('dropna, expected1, expected2', [
-        (True, ['1900-05-03', '2011-01-03', '2013-01-02'],
-               ['2011-01-03', '2013-01-02']),
-        (False, [np.nan], [np.nan, '2011-01-03', '2013-01-02']),
-    ])
-    def test_mode_datetime(self, dropna, expected1, expected2):
-        s = Series(['2011-01-03', '2013-01-02',
-                    '1900-05-03', 'nan', 'nan'], dtype='M8[ns]')
-        result = s.mode(dropna)
-        expected1 = Series(expected1, dtype='M8[ns]')
-        tm.assert_series_equal(result, expected1)
-
-        s = Series(['2011-01-03', '2013-01-02', '1900-05-03',
-                    '2011-01-03', '2013-01-02', 'nan', 'nan'],
-                   dtype='M8[ns]')
-        result = s.mode(dropna)
-        expected2 = Series(expected2, dtype='M8[ns]')
-        tm.assert_series_equal(result, expected2)
-
-    @pytest.mark.parametrize('dropna, expected1, expected2', [
-        (True, ['-1 days', '0 days', '1 days'], ['2 min', '1 day']),
-        (False, [np.nan], [np.nan, '2 min', '1 day']),
-    ])
-    def test_mode_timedelta(self, dropna, expected1, expected2):
-        # gh-5986: Test timedelta types.
-
-        s = Series(['1 days', '-1 days', '0 days', 'nan', 'nan'],
-                   dtype='timedelta64[ns]')
-        result = s.mode(dropna)
-        expected1 = Series(expected1, dtype='timedelta64[ns]')
-        tm.assert_series_equal(result, expected1)
-
-        s = Series(['1 day', '1 day', '-1 day', '-1 day 2 min',
-                    '2 min', '2 min', 'nan', 'nan'],
-                   dtype='timedelta64[ns]')
-        result = s.mode(dropna)
-        expected2 = Series(expected2, dtype='timedelta64[ns]')
-        tm.assert_series_equal(result, expected2)
-
-    @pytest.mark.parametrize('dropna, expected1, expected2, expected3', [
-        (True, Categorical([1, 2], categories=[1, 2]),
-         Categorical(['a'], categories=[1, 'a']),
-         Categorical([3, 1], categories=[3, 2, 1], ordered=True)),
-        (False, Categorical([np.nan], categories=[1, 2]),
-         Categorical([np.nan, 'a'], categories=[1, 'a']),
-         Categorical([np.nan, 3, 1], categories=[3, 2, 1], ordered=True)),
-    ])
-    def test_mode_category(self, dropna, expected1, expected2, expected3):
-        s = Series(Categorical([1, 2, np.nan, np.nan]))
-        result = s.mode(dropna)
-        expected1 = Series(expected1, dtype='category')
-        tm.assert_series_equal(result, expected1)
-
-        s = Series(Categorical([1, 'a', 'a', np.nan, np.nan]))
-        result = s.mode(dropna)
-        expected2 = Series(expected2, dtype='category')
-        tm.assert_series_equal(result, expected2)
-
-        s = Series(Categorical([1, 1, 2, 3, 3, np.nan, np.nan],
-                               categories=[3, 2, 1], ordered=True))
-        result = s.mode(dropna)
-        expected3 = Series(expected3, dtype='category')
-        tm.assert_series_equal(result, expected3)
-
-    @pytest.mark.parametrize('dropna, expected1, expected2', [
-        (True, [2**63], [1, 2**63]),
-        (False, [2**63], [1, 2**63])
-    ])
-    def test_mode_intoverflow(self, dropna, expected1, expected2):
-        # Test for uint64 overflow.
-        s = Series([1, 2**63, 2**63], dtype=np.uint64)
-        result = s.mode(dropna)
-        expected1 = Series(expected1, dtype=np.uint64)
-        tm.assert_series_equal(result, expected1)
-
-        s = Series([1, 2**63], dtype=np.uint64)
-        result = s.mode(dropna)
-        expected2 = Series(expected2, dtype=np.uint64)
-        tm.assert_series_equal(result, expected2)
-
-    @pytest.mark.skipif(not compat.PY3, reason="only PY3")
-    def test_mode_sortwarning(self):
-        # Check for the warning that is raised when the mode
-        # results cannot be sorted
-
-        expected = Series(['foo', np.nan])
-        s = Series([1, 'foo', 'foo', np.nan, np.nan])
-
-        with tm.assert_produces_warning(UserWarning, check_stacklevel=False):
-            result = s.mode(dropna=False)
-            result = result.sort_values().reset_index(drop=True)
-
-        tm.assert_series_equal(result, expected)
-
-
 def assert_check_nselect_boundary(vals, dtype, method):
     # helper function for 'test_boundary_{dtype}' tests
     s = Series(vals, dtype=dtype)

From 8c7996d2211a95cf67ff2d465dd3c1517b90a310 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Mon, 2 Jul 2018 17:25:33 +0200
Subject: [PATCH 22/55] DOC: fix spaces in 0.23.1 whatsnew file

Take from https://github.com/pandas-dev/pandas/commit/e92b78603e1404e49d6bcb19873d2d24225a8e50 (could not be cherry-picked in its totality)
---
 doc/source/whatsnew/v0.23.1.txt | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/doc/source/whatsnew/v0.23.1.txt b/doc/source/whatsnew/v0.23.1.txt
index db25bcf8113f5..a52ba22cf36d2 100644
--- a/doc/source/whatsnew/v0.23.1.txt
+++ b/doc/source/whatsnew/v0.23.1.txt
@@ -97,8 +97,8 @@ Bug Fixes
 
 **Data-type specific**
 
-- Bug in :meth:`Series.str.replace()` where the method throws `TypeError` on Python 3.5.2 (:issue: `21078`)
-- Bug in :class:`Timedelta`: where passing a float with a unit would prematurely round the float precision (:issue: `14156`)
+- Bug in :meth:`Series.str.replace()` where the method throws `TypeError` on Python 3.5.2 (:issue:`21078`)
+- Bug in :class:`Timedelta`: where passing a float with a unit would prematurely round the float precision (:issue:`14156`)
 - Bug in :func:`pandas.testing.assert_index_equal` which raised ``AssertionError`` incorrectly, when comparing two :class:`CategoricalIndex` objects with param ``check_categorical=False`` (:issue:`19776`)
 
 **Sparse**
@@ -110,12 +110,12 @@ Bug Fixes
 - Bug in :meth:`Series.reset_index` where appropriate error was not raised with an invalid level name (:issue:`20925`)
 - Bug in :func:`interval_range` when ``start``/``periods`` or ``end``/``periods`` are specified with float ``start`` or ``end`` (:issue:`21161`)
 - Bug in :meth:`MultiIndex.set_names` where error raised for a ``MultiIndex`` with ``nlevels == 1`` (:issue:`21149`)
-- Bug in :class:`IntervalIndex` constructors where creating an ``IntervalIndex`` from categorical data was not fully supported (:issue:`21243`, issue:`21253`)
+- Bug in :class:`IntervalIndex` constructors where creating an ``IntervalIndex`` from categorical data was not fully supported (:issue:`21243`, :issue:`21253`)
 - Bug in :meth:`MultiIndex.sort_index` which was not guaranteed to sort correctly with ``level=1``; this was also causing data misalignment in particular :meth:`DataFrame.stack` operations (:issue:`20994`, :issue:`20945`, :issue:`21052`)
 
 **Plotting**
 
-- New keywords (sharex, sharey) to turn on/off sharing of x/y-axis by subplots generated with pandas.DataFrame().groupby().boxplot() (:issue: `20968`)
+- New keywords (sharex, sharey) to turn on/off sharing of x/y-axis by subplots generated with pandas.DataFrame().groupby().boxplot() (:issue:`20968`)
 
 **I/O**
 

From d0f664a20d581919b6d5d6efef9704e540b013b8 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <TomAugspurger@users.noreply.github.com>
Date: Mon, 25 Jun 2018 09:57:43 -0500
Subject: [PATCH 23/55] CI: Test against Python 3.7 (#21604)

(cherry picked from commit 7829ad3290dc6894d24c1c853ffc4dabef50294a)
---
 .travis.yml                                  |  5 +++++
 ci/travis-37.yaml                            | 14 ++++++++++++++
 doc/source/install.rst                       |  2 +-
 doc/source/whatsnew/v0.23.2.txt              |  6 ++++++
 pandas/compat/__init__.py                    |  9 +++++----
 pandas/tests/tseries/offsets/test_offsets.py | 10 ++++++++--
 setup.py                                     |  1 +
 7 files changed, 40 insertions(+), 7 deletions(-)
 create mode 100644 ci/travis-37.yaml

diff --git a/.travis.yml b/.travis.yml
index 4e25380a7d941..2d2a0bc019c80 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -35,6 +35,11 @@ matrix:
       language: generic
       env:
         - JOB="3.5, OSX" ENV_FILE="ci/travis-35-osx.yaml" TEST_ARGS="--skip-slow --skip-network"
+
+    - dist: trusty
+      env:
+        - JOB="3.7" ENV_FILE="ci/travis-37.yaml" TEST_ARGS="--skip-slow --skip-network"
+
     - dist: trusty
       env:
         - JOB="2.7, locale, slow, old NumPy" ENV_FILE="ci/travis-27-locale.yaml" LOCALE_OVERRIDE="zh_CN.UTF-8" SLOW=true
diff --git a/ci/travis-37.yaml b/ci/travis-37.yaml
new file mode 100644
index 0000000000000..8b255c9e6ec72
--- /dev/null
+++ b/ci/travis-37.yaml
@@ -0,0 +1,14 @@
+name: pandas
+channels:
+  - defaults
+  - conda-forge
+  - c3i_test
+dependencies:
+  - python=3.7
+  - cython
+  - numpy
+  - python-dateutil
+  - nomkl
+  - pytz
+  - pytest
+  - pytest-xdist
diff --git a/doc/source/install.rst b/doc/source/install.rst
index 6054be112f52c..846170f9f0fa5 100644
--- a/doc/source/install.rst
+++ b/doc/source/install.rst
@@ -43,7 +43,7 @@ For more information, see the `Python 3 statement`_ and the `Porting to Python 3
 Python version support
 ----------------------
 
-Officially Python 2.7, 3.5, and 3.6.
+Officially Python 2.7, 3.5, 3.6, and 3.7.
 
 Installing pandas
 -----------------
diff --git a/doc/source/whatsnew/v0.23.2.txt b/doc/source/whatsnew/v0.23.2.txt
index 53ca4c0d1c144..5d196c4fe8d15 100644
--- a/doc/source/whatsnew/v0.23.2.txt
+++ b/doc/source/whatsnew/v0.23.2.txt
@@ -6,6 +6,12 @@ v0.23.2
 This is a minor bug-fix release in the 0.23.x series and includes some small regression fixes
 and bug fixes. We recommend that all users upgrade to this version.
 
+.. note::
+
+   Pandas 0.23.2 is first pandas release that's compatible with
+   Python 3.7 (:issue:`20552`)
+
+
 .. contents:: What's new in v0.23.2
     :local:
     :backlinks: none
diff --git a/pandas/compat/__init__.py b/pandas/compat/__init__.py
index 5ae22694d0da7..28a55133e68aa 100644
--- a/pandas/compat/__init__.py
+++ b/pandas/compat/__init__.py
@@ -40,10 +40,11 @@
 from collections import namedtuple
 
 PY2 = sys.version_info[0] == 2
-PY3 = (sys.version_info[0] >= 3)
-PY35 = (sys.version_info >= (3, 5))
-PY36 = (sys.version_info >= (3, 6))
-PYPY = (platform.python_implementation() == 'PyPy')
+PY3 = sys.version_info[0] >= 3
+PY35 = sys.version_info >= (3, 5)
+PY36 = sys.version_info >= (3, 6)
+PY37 = sys.version_info >= (3, 7)
+PYPY = platform.python_implementation() == 'PyPy'
 
 try:
     import __builtin__ as builtins
diff --git a/pandas/tests/tseries/offsets/test_offsets.py b/pandas/tests/tseries/offsets/test_offsets.py
index 5369b1a94a956..0c08d813a7f1b 100644
--- a/pandas/tests/tseries/offsets/test_offsets.py
+++ b/pandas/tests/tseries/offsets/test_offsets.py
@@ -528,7 +528,10 @@ def test_repr(self):
         assert repr(self.offset) == '<BusinessDay>'
         assert repr(self.offset2) == '<2 * BusinessDays>'
 
-        expected = '<BusinessDay: offset=datetime.timedelta(1)>'
+        if compat.PY37:
+            expected = '<BusinessDay: offset=datetime.timedelta(days=1)>'
+        else:
+            expected = '<BusinessDay: offset=datetime.timedelta(1)>'
         assert repr(self.offset + timedelta(1)) == expected
 
     def test_with_offset(self):
@@ -1642,7 +1645,10 @@ def test_repr(self):
         assert repr(self.offset) == '<CustomBusinessDay>'
         assert repr(self.offset2) == '<2 * CustomBusinessDays>'
 
-        expected = '<BusinessDay: offset=datetime.timedelta(1)>'
+        if compat.PY37:
+            expected = '<BusinessDay: offset=datetime.timedelta(days=1)>'
+        else:
+            expected = '<BusinessDay: offset=datetime.timedelta(1)>'
         assert repr(self.offset + timedelta(1)) == expected
 
     def test_with_offset(self):
diff --git a/setup.py b/setup.py
index 90ec8e91a0700..c5831eb097767 100755
--- a/setup.py
+++ b/setup.py
@@ -217,6 +217,7 @@ def build_extensions(self):
     'Programming Language :: Python :: 2.7',
     'Programming Language :: Python :: 3.5',
     'Programming Language :: Python :: 3.6',
+    'Programming Language :: Python :: 3.7',
     'Programming Language :: Cython',
     'Topic :: Scientific/Engineering']
 

From 684a4bda53cd37d7972162cfe5a582966cc1b070 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <TomAugspurger@users.noreply.github.com>
Date: Tue, 26 Jun 2018 02:34:15 -0500
Subject: [PATCH 24/55] API/COMPAT: support axis=None for logical reduction
 (reduce over all axes) (#21486)

* Compat with NumPy 1.15 logical func
* Accepts axis=None as reduce all dims

(cherry picked from commit f7ed7f8e30e7418b346831c73e2f4541b7ae11be)
---
 doc/source/whatsnew/v0.23.2.txt      |  30 +++++++
 pandas/core/frame.py                 |  22 ++++-
 pandas/core/generic.py               |  44 ++++++----
 pandas/core/panel.py                 |  17 +++-
 pandas/core/series.py                |   3 +-
 pandas/tests/frame/test_analytics.py | 119 +++++++++++++++++++++++++--
 pandas/tests/test_panel.py           |   7 ++
 pandas/util/_test_decorators.py      |   4 +
 8 files changed, 215 insertions(+), 31 deletions(-)

diff --git a/doc/source/whatsnew/v0.23.2.txt b/doc/source/whatsnew/v0.23.2.txt
index 5d196c4fe8d15..f5a520216b2be 100644
--- a/doc/source/whatsnew/v0.23.2.txt
+++ b/doc/source/whatsnew/v0.23.2.txt
@@ -16,6 +16,36 @@ and bug fixes. We recommend that all users upgrade to this version.
     :local:
     :backlinks: none
 
+.. _whatsnew_0232.enhancements:
+
+Logical Reductions over Entire DataFrame
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+:meth:`DataFrame.all` and :meth:`DataFrame.any` now accept ``axis=None`` to reduce over all axes to a scalar (:issue:`19976`)
+
+.. ipython:: python
+
+   df = pd.DataFrame({"A": [1, 2], "B": [True, False]})
+   df.all(axis=None)
+
+
+This also provides compatibility with NumPy 1.15, which now dispatches to ``DataFrame.all``.
+With NumPy 1.15 and pandas 0.23.1 or earlier, :func:`numpy.all` will no longer reduce over every axis:
+
+.. code-block:: python
+
+   >>> # NumPy 1.15, pandas 0.23.1
+   >>> np.any(pd.DataFrame({"A": [False], "B": [False]}))
+   A    False
+   B    False
+   dtype: bool
+
+With pandas 0.23.2, that will correctly return False, as it did with NumPy < 1.15.
+
+.. ipython:: python
+
+   np.any(pd.DataFrame({"A": [False], "B": [False]}))
+
 
 .. _whatsnew_0232.fixed_regressions:
 
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 9f6e834f0a25f..2a40dd28a6fd7 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -6844,13 +6844,18 @@ def _count_level(self, level, axis=0, numeric_only=False):
 
     def _reduce(self, op, name, axis=0, skipna=True, numeric_only=None,
                 filter_type=None, **kwds):
-        axis = self._get_axis_number(axis)
+        if axis is None and filter_type == 'bool':
+            labels = None
+            constructor = None
+        else:
+            # TODO: Make other agg func handle axis=None properly
+            axis = self._get_axis_number(axis)
+            labels = self._get_agg_axis(axis)
+            constructor = self._constructor
 
         def f(x):
             return op(x, axis=axis, skipna=skipna, **kwds)
 
-        labels = self._get_agg_axis(axis)
-
         # exclude timedelta/datetime unless we are uniform types
         if axis == 1 and self._is_mixed_type and self._is_datelike_mixed_type:
             numeric_only = True
@@ -6859,6 +6864,13 @@ def f(x):
             try:
                 values = self.values
                 result = f(values)
+
+                if (filter_type == 'bool' and is_object_dtype(values) and
+                        axis is None):
+                    # work around https://github.com/numpy/numpy/issues/10489
+                    # TODO: combine with hasattr(result, 'dtype') further down
+                    # hard since we don't have `values` down there.
+                    result = np.bool_(result)
             except Exception as e:
 
                 # try by-column first
@@ -6925,7 +6937,9 @@ def f(x):
                 if axis == 0:
                     result = coerce_to_dtypes(result, self.dtypes)
 
-        return Series(result, index=labels)
+        if constructor is not None:
+            result = Series(result, index=labels)
+        return result
 
     def nunique(self, axis=0, dropna=True):
         """
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index 612ee7cb42021..50a5c10a6865f 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -8729,6 +8729,8 @@ def pct_change(self, periods=1, fill_method='pad', limit=None, freq=None,
         return rs
 
     def _agg_by_level(self, name, axis=0, level=0, skipna=True, **kwargs):
+        if axis is None:
+            raise ValueError("Must specify 'axis' when aggregating by level.")
         grouped = self.groupby(level=level, axis=axis, sort=False)
         if hasattr(grouped, name) and skipna:
             return getattr(grouped, name)(**kwargs)
@@ -9055,8 +9057,15 @@ def _doc_parms(cls):
 
 Parameters
 ----------
-axis : int, default 0
-    Select the axis which can be 0 for indices and 1 for columns.
+axis : {0 or 'index', 1 or 'columns', None}, default 0
+    Indicate which axis or axes should be reduced.
+
+    * 0 / 'index' : reduce the index, return a Series whose index is the
+      original column labels.
+    * 1 / 'columns' : reduce the columns, return a Series whose index is the
+      original index.
+    * None : reduce all axes, return a scalar.
+
 skipna : boolean, default True
     Exclude NA/null values. If an entire row/column is NA, the result
     will be NA.
@@ -9078,9 +9087,9 @@ def _doc_parms(cls):
 %(examples)s"""
 
 _all_doc = """\
-Return whether all elements are True over series or dataframe axis.
+Return whether all elements are True, potentially over an axis.
 
-Returns True if all elements within a series or along a dataframe
+Returns True if all elements within a series or along a Dataframe
 axis are non-zero, not-empty or not-False."""
 
 _all_examples = """\
@@ -9093,7 +9102,7 @@ def _doc_parms(cls):
 >>> pd.Series([True, False]).all()
 False
 
-Dataframes
+DataFrames
 
 Create a dataframe from a dictionary.
 
@@ -9110,12 +9119,17 @@ def _doc_parms(cls):
 col2    False
 dtype: bool
 
-Adding axis=1 argument will check if row-wise values all return True.
+Specify ``axis='columns'`` to check if row-wise values all return True.
 
->>> df.all(axis=1)
+>>> df.all(axis='columns')
 0     True
 1    False
 dtype: bool
+
+Or ``axis=None`` for whether every value is True.
+
+>>> df.all(axis=None)
+False
 """
 
 _all_see_also = """\
@@ -9481,6 +9495,11 @@ def _doc_parms(cls):
 1    False
 dtype: bool
 
+Aggregating over the entire DataFrame with ``axis=None``.
+
+>>> df.any(axis=None)
+True
+
 `any` for an empty DataFrame is an empty Series.
 
 >>> pd.DataFrame([]).any()
@@ -9651,22 +9670,17 @@ def _make_logical_function(cls, name, name1, name2, axis_descr, desc, f,
     @Substitution(outname=name, desc=desc, name1=name1, name2=name2,
                   axis_descr=axis_descr, examples=examples, see_also=see_also)
     @Appender(_bool_doc)
-    def logical_func(self, axis=None, bool_only=None, skipna=None, level=None,
+    def logical_func(self, axis=0, bool_only=None, skipna=True, level=None,
                      **kwargs):
         nv.validate_logical_func(tuple(), kwargs, fname=name)
-        if skipna is None:
-            skipna = True
-        if axis is None:
-            axis = self._stat_axis_number
         if level is not None:
             if bool_only is not None:
                 raise NotImplementedError("Option bool_only is not "
                                           "implemented with option level.")
             return self._agg_by_level(name, axis=axis, level=level,
                                       skipna=skipna)
-        return self._reduce(f, axis=axis, skipna=skipna,
-                            numeric_only=bool_only, filter_type='bool',
-                            name=name)
+        return self._reduce(f, name, axis=axis, skipna=skipna,
+                            numeric_only=bool_only, filter_type='bool')
 
     return set_function_name(logical_func, name, cls)
 
diff --git a/pandas/core/panel.py b/pandas/core/panel.py
index 16e64192fdb20..bad0dd79aaedd 100644
--- a/pandas/core/panel.py
+++ b/pandas/core/panel.py
@@ -1143,13 +1143,26 @@ def _reduce(self, op, name, axis=0, skipna=True, numeric_only=None,
             raise NotImplementedError('Panel.{0} does not implement '
                                       'numeric_only.'.format(name))
 
-        axis_name = self._get_axis_name(axis)
-        axis_number = self._get_axis_number(axis_name)
+        if axis is None and filter_type == 'bool':
+            # labels = None
+            # constructor = None
+            axis_number = None
+            axis_name = None
+        else:
+            # TODO: Make other agg func handle axis=None properly
+            axis = self._get_axis_number(axis)
+            # labels = self._get_agg_axis(axis)
+            # constructor = self._constructor
+            axis_name = self._get_axis_name(axis)
+            axis_number = self._get_axis_number(axis_name)
+
         f = lambda x: op(x, axis=axis_number, skipna=skipna, **kwds)
 
         with np.errstate(all='ignore'):
             result = f(self.values)
 
+        if axis is None and filter_type == 'bool':
+            return np.bool_(result)
         axes = self._get_plane_axes(axis_name)
         if result.ndim == 2 and axis_name != self._info_axis_name:
             result = result.T
diff --git a/pandas/core/series.py b/pandas/core/series.py
index 6975dd8fc918e..6b005c673c7cd 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -3212,7 +3212,8 @@ def _reduce(self, op, name, axis=0, skipna=True, numeric_only=None,
         delegate = self._values
         if isinstance(delegate, np.ndarray):
             # Validate that 'axis' is consistent with Series's single axis.
-            self._get_axis_number(axis)
+            if axis is not None:
+                self._get_axis_number(axis)
             if numeric_only:
                 raise NotImplementedError('Series.{0} does not implement '
                                           'numeric_only.'.format(name))
diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py
index 4197339ff6e03..437d3a9d24730 100644
--- a/pandas/tests/frame/test_analytics.py
+++ b/pandas/tests/frame/test_analytics.py
@@ -15,7 +15,7 @@
 from pandas.compat import lrange, PY35
 from pandas import (compat, isna, notna, DataFrame, Series,
                     MultiIndex, date_range, Timestamp, Categorical,
-                    _np_version_under1p12, _np_version_under1p15)
+                    _np_version_under1p12)
 import pandas as pd
 import pandas.core.nanops as nanops
 import pandas.core.algorithms as algorithms
@@ -1139,11 +1139,35 @@ def test_any_all(self):
         self._check_bool_op('any', np.any, has_skipna=True, has_bool_only=True)
         self._check_bool_op('all', np.all, has_skipna=True, has_bool_only=True)
 
-        df = DataFrame(randn(10, 4)) > 0
-        df.any(1)
-        df.all(1)
-        df.any(1, bool_only=True)
-        df.all(1, bool_only=True)
+    def test_any_all_extra(self):
+        df = DataFrame({
+            'A': [True, False, False],
+            'B': [True, True, False],
+            'C': [True, True, True],
+        }, index=['a', 'b', 'c'])
+        result = df[['A', 'B']].any(1)
+        expected = Series([True, True, False], index=['a', 'b', 'c'])
+        tm.assert_series_equal(result, expected)
+
+        result = df[['A', 'B']].any(1, bool_only=True)
+        tm.assert_series_equal(result, expected)
+
+        result = df.all(1)
+        expected = Series([True, False, False], index=['a', 'b', 'c'])
+        tm.assert_series_equal(result, expected)
+
+        result = df.all(1, bool_only=True)
+        tm.assert_series_equal(result, expected)
+
+        # Axis is None
+        result = df.all(axis=None).item()
+        assert result is False
+
+        result = df.any(axis=None).item()
+        assert result is True
+
+        result = df[['C']].all(axis=None).item()
+        assert result is True
 
         # skip pathological failure cases
         # class CantNonzero(object):
@@ -1165,6 +1189,86 @@ def test_any_all(self):
         # df.any(1, bool_only=True)
         # df.all(1, bool_only=True)
 
+    @pytest.mark.parametrize('func, data, expected', [
+        (np.any, {}, False),
+        (np.all, {}, True),
+        (np.any, {'A': []}, False),
+        (np.all, {'A': []}, True),
+        (np.any, {'A': [False, False]}, False),
+        (np.all, {'A': [False, False]}, False),
+        (np.any, {'A': [True, False]}, True),
+        (np.all, {'A': [True, False]}, False),
+        (np.any, {'A': [True, True]}, True),
+        (np.all, {'A': [True, True]}, True),
+
+        (np.any, {'A': [False], 'B': [False]}, False),
+        (np.all, {'A': [False], 'B': [False]}, False),
+
+        (np.any, {'A': [False, False], 'B': [False, True]}, True),
+        (np.all, {'A': [False, False], 'B': [False, True]}, False),
+
+        # other types
+        (np.all, {'A': pd.Series([0.0, 1.0], dtype='float')}, False),
+        (np.any, {'A': pd.Series([0.0, 1.0], dtype='float')}, True),
+        (np.all, {'A': pd.Series([0, 1], dtype=int)}, False),
+        (np.any, {'A': pd.Series([0, 1], dtype=int)}, True),
+        pytest.param(np.all, {'A': pd.Series([0, 1], dtype='M8[ns]')}, False,
+                     marks=[td.skip_if_np_lt_115]),
+        pytest.param(np.any, {'A': pd.Series([0, 1], dtype='M8[ns]')}, True,
+                     marks=[td.skip_if_np_lt_115]),
+        pytest.param(np.all, {'A': pd.Series([1, 2], dtype='M8[ns]')}, True,
+                     marks=[td.skip_if_np_lt_115]),
+        pytest.param(np.any, {'A': pd.Series([1, 2], dtype='M8[ns]')}, True,
+                     marks=[td.skip_if_np_lt_115]),
+        pytest.param(np.all, {'A': pd.Series([0, 1], dtype='m8[ns]')}, False,
+                     marks=[td.skip_if_np_lt_115]),
+        pytest.param(np.any, {'A': pd.Series([0, 1], dtype='m8[ns]')}, True,
+                     marks=[td.skip_if_np_lt_115]),
+        pytest.param(np.all, {'A': pd.Series([1, 2], dtype='m8[ns]')}, True,
+                     marks=[td.skip_if_np_lt_115]),
+        pytest.param(np.any, {'A': pd.Series([1, 2], dtype='m8[ns]')}, True,
+                     marks=[td.skip_if_np_lt_115]),
+        (np.all, {'A': pd.Series([0, 1], dtype='category')}, False),
+        (np.any, {'A': pd.Series([0, 1], dtype='category')}, True),
+        (np.all, {'A': pd.Series([1, 2], dtype='category')}, True),
+        (np.any, {'A': pd.Series([1, 2], dtype='category')}, True),
+
+        # # Mix
+        # GH-21484
+        # (np.all, {'A': pd.Series([10, 20], dtype='M8[ns]'),
+        #           'B': pd.Series([10, 20], dtype='m8[ns]')}, True),
+    ])
+    def test_any_all_np_func(self, func, data, expected):
+        # https://github.com/pandas-dev/pandas/issues/19976
+        data = DataFrame(data)
+        result = func(data)
+        assert isinstance(result, np.bool_)
+        assert result.item() is expected
+
+        # method version
+        result = getattr(DataFrame(data), func.__name__)(axis=None)
+        assert isinstance(result, np.bool_)
+        assert result.item() is expected
+
+    def test_any_all_object(self):
+        # https://github.com/pandas-dev/pandas/issues/19976
+        result = np.all(DataFrame(columns=['a', 'b'])).item()
+        assert result is True
+
+        result = np.any(DataFrame(columns=['a', 'b'])).item()
+        assert result is False
+
+    @pytest.mark.parametrize('method', ['any', 'all'])
+    def test_any_all_level_axis_none_raises(self, method):
+        df = DataFrame(
+            {"A": 1},
+            index=MultiIndex.from_product([['A', 'B'], ['a', 'b']],
+                                          names=['out', 'in'])
+        )
+        xpr = "Must specify 'axis' when aggregating by level."
+        with tm.assert_raises_regex(ValueError, xpr):
+            getattr(df, method)(axis=None, level='out')
+
     def _check_bool_op(self, name, alternative, frame=None, has_skipna=True,
                        has_bool_only=False):
         if frame is None:
@@ -2071,9 +2175,6 @@ def test_clip_against_list_like(self, inplace, lower, axis, res):
             result = original
         tm.assert_frame_equal(result, expected, check_exact=True)
 
-    @pytest.mark.xfail(
-        not _np_version_under1p15,
-        reason="failing under numpy-dev gh-19976")
     @pytest.mark.parametrize("axis", [0, 1, None])
     def test_clip_against_frame(self, axis):
         df = DataFrame(np.random.randn(1000, 2))
diff --git a/pandas/tests/test_panel.py b/pandas/tests/test_panel.py
index 7973b27601237..128ab0572ba55 100644
--- a/pandas/tests/test_panel.py
+++ b/pandas/tests/test_panel.py
@@ -2717,3 +2717,10 @@ def test_panel_index():
                                        np.repeat([1, 2, 3], 4)],
                                       names=['time', 'panel'])
     tm.assert_index_equal(index, expected)
+
+
+def test_panel_np_all():
+    with catch_warnings(record=True):
+        wp = Panel({"A": DataFrame({'b': [1, 2]})})
+    result = np.all(wp)
+    assert result == np.bool_(True)
diff --git a/pandas/util/_test_decorators.py b/pandas/util/_test_decorators.py
index 89d90258f58e0..27c24e3a68079 100644
--- a/pandas/util/_test_decorators.py
+++ b/pandas/util/_test_decorators.py
@@ -30,6 +30,7 @@ def test_foo():
 
 from pandas.compat import (is_platform_windows, is_platform_32bit, PY3,
                            import_lzma)
+from pandas.compat.numpy import _np_version_under1p15
 from pandas.core.computation.expressions import (_USE_NUMEXPR,
                                                  _NUMEXPR_INSTALLED)
 
@@ -160,6 +161,9 @@ def decorated_func(func):
 
 skip_if_no_mpl = pytest.mark.skipif(_skip_if_no_mpl(),
                                     reason="Missing matplotlib dependency")
+
+skip_if_np_lt_115 = pytest.mark.skipif(_np_version_under1p15,
+                                       reason="NumPy 1.15 or greater required")
 skip_if_mpl = pytest.mark.skipif(not _skip_if_no_mpl(),
                                  reason="matplotlib is present")
 skip_if_mpl_1_5 = pytest.mark.skipif(_skip_if_mpl_1_5(),

From 01bb92127abd4a23005e780eb1e9b09cacfbb748 Mon Sep 17 00:00:00 2001
From: Jeff Reback <jeff@reback.net>
Date: Tue, 26 Jun 2018 08:26:21 -0400
Subject: [PATCH 25/55] TST: xfail flaky 3.7 test, xref #21636 (#21637)

(cherry picked from commit dbd102c863adb36d07b999c2fc26403717c4bc32)
---
 pandas/tests/groupby/test_categorical.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py
index e0793b8e1bd64..0fec6a8f96a24 100644
--- a/pandas/tests/groupby/test_categorical.py
+++ b/pandas/tests/groupby/test_categorical.py
@@ -6,6 +6,7 @@
 
 import numpy as np
 import pandas as pd
+from pandas.compat import PY37
 from pandas import (Index, MultiIndex, CategoricalIndex,
                     DataFrame, Categorical, Series, qcut)
 from pandas.util.testing import assert_frame_equal, assert_series_equal
@@ -205,6 +206,7 @@ def test_level_get_group(observed):
     assert_frame_equal(result, expected)
 
 
+@pytest.mark.xfail(PY37, reason="flaky on 3.7, xref gh-21636")
 @pytest.mark.parametrize('ordered', [True, False])
 def test_apply(ordered):
     # GH 10138

From 417e87372831c4c5f906a99e19227e1d5ab7d2b3 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <TomAugspurger@users.noreply.github.com>
Date: Tue, 26 Jun 2018 10:02:17 -0500
Subject: [PATCH 26/55] PKG: Exclude data test files. (#19535)

(cherry picked from commit 36422a88474396148bd7d5d38aa238ea844d9555)
---
 MANIFEST.in                                   | 34 ++++---
 ci/script_single.sh                           |  8 +-
 doc/source/whatsnew/v0.23.2.txt               |  5 +
 pandas/conftest.py                            | 42 +++++++++
 pandas/tests/indexes/test_multi.py            |  8 +-
 pandas/tests/io/conftest.py                   | 21 ++---
 pandas/tests/io/formats/test_format.py        |  4 +-
 pandas/tests/io/json/test_compression.py      |  6 +-
 pandas/tests/io/json/test_pandas.py           |  8 +-
 pandas/tests/io/parser/common.py              | 25 +++--
 pandas/tests/io/parser/compression.py         |  4 +-
 pandas/tests/io/parser/dtypes.py              |  6 +-
 pandas/tests/io/parser/test_network.py        | 53 +++++------
 pandas/tests/io/parser/test_parsers.py        |  6 +-
 pandas/tests/io/parser/test_textreader.py     |  5 +-
 pandas/tests/io/sas/test_sas7bdat.py          | 43 ++++-----
 pandas/tests/io/sas/test_xport.py             |  6 +-
 pandas/tests/io/test_common.py                | 54 +++++------
 pandas/tests/io/test_excel.py                 | 12 +--
 pandas/tests/io/test_html.py                  | 92 +++++++++++--------
 pandas/tests/io/test_packers.py               | 51 +++++-----
 pandas/tests/io/test_pickle.py                | 38 ++++----
 pandas/tests/io/test_pytables.py              | 23 +++--
 pandas/tests/io/test_sql.py                   | 63 +++++++------
 pandas/tests/io/test_stata.py                 |  9 +-
 pandas/tests/plotting/common.py               |  5 -
 pandas/tests/plotting/test_deprecated.py      |  5 +-
 pandas/tests/plotting/test_misc.py            | 16 ++--
 pandas/tests/reshape/merge/test_merge_asof.py | 33 +++----
 pandas/tests/reshape/test_tile.py             |  6 +-
 pandas/tests/tseries/offsets/test_offsets.py  | 16 ++--
 pandas/tests/util/test_testing.py             | 13 +++
 pandas/util/_test_decorators.py               |  1 -
 pandas/util/testing.py                        | 10 --
 setup.cfg                                     |  3 +-
 setup.py                                      |  6 +-
 36 files changed, 393 insertions(+), 347 deletions(-)

diff --git a/MANIFEST.in b/MANIFEST.in
index 9773019c6e6e0..b417b8890fa24 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -3,27 +3,39 @@ include LICENSE
 include RELEASE.md
 include README.md
 include setup.py
-include pyproject.toml
 
 graft doc
 prune doc/build
 
+graft LICENSES
+
 graft pandas
 
-global-exclude *.so
-global-exclude *.pyd
+global-exclude *.bz2
+global-exclude *.csv
+global-exclude *.dta
+global-exclude *.gz
+global-exclude *.h5
+global-exclude *.html
+global-exclude *.json
+global-exclude *.msgpack
+global-exclude *.pickle
+global-exclude *.png
 global-exclude *.pyc
+global-exclude *.pyd
+global-exclude *.sas7bdat
+global-exclude *.so
+global-exclude *.xls
+global-exclude *.xlsm
+global-exclude *.xlsx
+global-exclude *.xpt
+global-exclude *.xz
+global-exclude *.zip
 global-exclude *~
-global-exclude \#*
-global-exclude .git*
 global-exclude .DS_Store
-global-exclude *.png
+global-exclude .git*
+global-exclude \#*
 
-# include examples/data/*
-# recursive-include examples *.py
-# recursive-include doc/source *
-# recursive-include doc/sphinxext *
-# recursive-include LICENSES *
 include versioneer.py
 include pandas/_version.py
 include pandas/io/formats/templates/*.tpl
diff --git a/ci/script_single.sh b/ci/script_single.sh
index f376c920ac71b..60e2fbb33ee5d 100755
--- a/ci/script_single.sh
+++ b/ci/script_single.sh
@@ -25,12 +25,12 @@ if [ "$DOC" ]; then
     echo "We are not running pytest as this is a doc-build"
 
 elif [ "$COVERAGE" ]; then
-    echo pytest -s -m "single" --strict --cov=pandas --cov-report xml:/tmp/cov-single.xml --junitxml=/tmp/single.xml $TEST_ARGS pandas
-    pytest -s -m "single" --strict --cov=pandas --cov-report xml:/tmp/cov-single.xml --junitxml=/tmp/single.xml $TEST_ARGS pandas
+    echo pytest -s -m "single" -r xXs --strict --cov=pandas --cov-report xml:/tmp/cov-single.xml --junitxml=/tmp/single.xml $TEST_ARGS pandas
+    pytest      -s -m "single" -r xXs --strict --cov=pandas --cov-report xml:/tmp/cov-single.xml --junitxml=/tmp/single.xml $TEST_ARGS pandas
 
 else
-    echo pytest -m "single" -r xX --junitxml=/tmp/single.xml --strict $TEST_ARGS pandas
-    pytest -m "single" -r xX  --junitxml=/tmp/single.xml --strict $TEST_ARGS pandas # TODO: doctest
+    echo pytest -m "single" -r xXs --junitxml=/tmp/single.xml --strict $TEST_ARGS pandas
+    pytest      -m "single" -r xXs --junitxml=/tmp/single.xml --strict $TEST_ARGS pandas # TODO: doctest
 
 fi
 
diff --git a/doc/source/whatsnew/v0.23.2.txt b/doc/source/whatsnew/v0.23.2.txt
index f5a520216b2be..b3da4d1c4e288 100644
--- a/doc/source/whatsnew/v0.23.2.txt
+++ b/doc/source/whatsnew/v0.23.2.txt
@@ -70,6 +70,11 @@ Documentation Changes
 -
 -
 
+Build Changes
+-------------
+
+- The source and binary distributions no longer include test data files, resulting in smaller download sizes. Tests relying on these data files will be skipped when using ``pandas.test()``. (:issue:`19320`)
+
 .. _whatsnew_0232.bug_fixes:
 
 Bug Fixes
diff --git a/pandas/conftest.py b/pandas/conftest.py
index 9d806a91f37f7..ead357747666d 100644
--- a/pandas/conftest.py
+++ b/pandas/conftest.py
@@ -1,5 +1,8 @@
+import os
+
 import pytest
 
+import pandas
 import numpy as np
 import pandas as pd
 from pandas.compat import PY3
@@ -15,6 +18,8 @@ def pytest_addoption(parser):
                      help="run high memory tests")
     parser.addoption("--only-slow", action="store_true",
                      help="run only slow tests")
+    parser.addoption("--strict-data-files", action="store_true",
+                     help="Fail if a test is skipped for missing data file.")
 
 
 def pytest_runtest_setup(item):
@@ -129,6 +134,43 @@ def join_type(request):
     return request.param
 
 
+@pytest.fixture
+def datapath(request):
+    """Get the path to a data file.
+
+    Parameters
+    ----------
+    path : str
+        Path to the file, relative to ``pandas/tests/``
+
+    Returns
+    -------
+    path : path including ``pandas/tests``.
+
+    Raises
+    ------
+    ValueError
+        If the path doesn't exist and the --strict-data-files option is set.
+    """
+    def deco(*args):
+        path = os.path.join('pandas', 'tests', *args)
+        if not os.path.exists(path):
+            if request.config.getoption("--strict-data-files"):
+                msg = "Could not find file {} and --strict-data-files is set."
+                raise ValueError(msg.format(path))
+            else:
+                msg = "Could not find {}."
+                pytest.skip(msg.format(path))
+        return path
+    return deco
+
+
+@pytest.fixture
+def iris(datapath):
+    """The iris dataset as a DataFrame."""
+    return pandas.read_csv(datapath('data', 'iris.csv'))
+
+
 @pytest.fixture(params=['nlargest', 'nsmallest'])
 def nselect_method(request):
     """
diff --git a/pandas/tests/indexes/test_multi.py b/pandas/tests/indexes/test_multi.py
index df506ae9486ee..3ede83b5969ce 100644
--- a/pandas/tests/indexes/test_multi.py
+++ b/pandas/tests/indexes/test_multi.py
@@ -1181,12 +1181,12 @@ def test_iter(self):
                     ('baz', 'two'), ('qux', 'one'), ('qux', 'two')]
         assert result == expected
 
-    def test_legacy_pickle(self):
+    def test_legacy_pickle(self, datapath):
         if PY3:
             pytest.skip("testing for legacy pickles not "
                         "support on py3")
 
-        path = tm.get_data_path('multiindex_v1.pickle')
+        path = datapath('indexes', 'data', 'multiindex_v1.pickle')
         obj = pd.read_pickle(path)
 
         obj2 = MultiIndex.from_tuples(obj.values)
@@ -1202,10 +1202,10 @@ def test_legacy_pickle(self):
         assert_almost_equal(res, exp)
         assert_almost_equal(exp, exp2)
 
-    def test_legacy_v2_unpickle(self):
+    def test_legacy_v2_unpickle(self, datapath):
 
         # 0.7.3 -> 0.8.0 format manage
-        path = tm.get_data_path('mindex_073.pickle')
+        path = datapath('indexes', 'data', 'mindex_073.pickle')
         obj = pd.read_pickle(path)
 
         obj2 = MultiIndex.from_tuples(obj.values)
diff --git a/pandas/tests/io/conftest.py b/pandas/tests/io/conftest.py
index 8deb51e190bab..7623587803b41 100644
--- a/pandas/tests/io/conftest.py
+++ b/pandas/tests/io/conftest.py
@@ -1,32 +1,23 @@
-import os
-
 import pytest
 from pandas.io.parsers import read_table
-from pandas.util import testing as tm
-
-
-@pytest.fixture
-def parser_data(request):
-    return os.path.join(tm.get_data_path(), '..', 'parser', 'data')
 
 
 @pytest.fixture
-def tips_file(parser_data):
+def tips_file(datapath):
     """Path to the tips dataset"""
-    return os.path.join(parser_data, 'tips.csv')
+    return datapath('io', 'parser', 'data', 'tips.csv')
 
 
 @pytest.fixture
-def jsonl_file(parser_data):
+def jsonl_file(datapath):
     """Path a JSONL dataset"""
-    return os.path.join(parser_data, 'items.jsonl')
+    return datapath('io', 'parser', 'data', 'items.jsonl')
 
 
 @pytest.fixture
-def salaries_table(parser_data):
+def salaries_table(datapath):
     """DataFrame with the salaries dataset"""
-    path = os.path.join(parser_data, 'salaries.csv')
-    return read_table(path)
+    return read_table(datapath('io', 'parser', 'data', 'salaries.csv'))
 
 
 @pytest.fixture
diff --git a/pandas/tests/io/formats/test_format.py b/pandas/tests/io/formats/test_format.py
index f221df93dd412..63b7cb3459069 100644
--- a/pandas/tests/io/formats/test_format.py
+++ b/pandas/tests/io/formats/test_format.py
@@ -916,8 +916,8 @@ def test_unicode_problem_decoding_as_ascii(self):
         dm = DataFrame({u('c/\u03c3'): Series({'test': np.nan})})
         compat.text_type(dm.to_string())
 
-    def test_string_repr_encoding(self):
-        filepath = tm.get_data_path('unicode_series.csv')
+    def test_string_repr_encoding(self, datapath):
+        filepath = datapath('io', 'formats', 'data', 'unicode_series.csv')
         df = pd.read_csv(filepath, header=None, encoding='latin1')
         repr(df)
         repr(df[1])
diff --git a/pandas/tests/io/json/test_compression.py b/pandas/tests/io/json/test_compression.py
index c9074ca49e5be..05ceace20f5a4 100644
--- a/pandas/tests/io/json/test_compression.py
+++ b/pandas/tests/io/json/test_compression.py
@@ -21,11 +21,11 @@ def test_compression_roundtrip(compression):
         assert_frame_equal(df, pd.read_json(result))
 
 
-def test_read_zipped_json():
-    uncompressed_path = tm.get_data_path("tsframe_v012.json")
+def test_read_zipped_json(datapath):
+    uncompressed_path = datapath("io", "json", "data", "tsframe_v012.json")
     uncompressed_df = pd.read_json(uncompressed_path)
 
-    compressed_path = tm.get_data_path("tsframe_v012.json.zip")
+    compressed_path = datapath("io", "json", "data", "tsframe_v012.json.zip")
     compressed_df = pd.read_json(compressed_path, compression='zip')
 
     assert_frame_equal(uncompressed_df, compressed_df)
diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py
index 7e497c395266f..bcbac4400c953 100644
--- a/pandas/tests/io/json/test_pandas.py
+++ b/pandas/tests/io/json/test_pandas.py
@@ -37,8 +37,9 @@
 
 class TestPandasContainer(object):
 
-    def setup_method(self, method):
-        self.dirpath = tm.get_data_path()
+    @pytest.fixture(scope="function", autouse=True)
+    def setup(self, datapath):
+        self.dirpath = datapath("io", "json", "data")
 
         self.ts = tm.makeTimeSeries()
         self.ts.name = 'ts'
@@ -59,7 +60,8 @@ def setup_method(self, method):
         self.mixed_frame = _mixed_frame.copy()
         self.categorical = _cat_frame.copy()
 
-    def teardown_method(self, method):
+        yield
+
         del self.dirpath
 
         del self.ts
diff --git a/pandas/tests/io/parser/common.py b/pandas/tests/io/parser/common.py
index b39122e5e7906..fb510f1a74556 100644
--- a/pandas/tests/io/parser/common.py
+++ b/pandas/tests/io/parser/common.py
@@ -77,7 +77,7 @@ def test_read_csv(self):
             else:
                 prefix = u("file://")
 
-            fname = prefix + compat.text_type(self.csv1)
+            fname = prefix + compat.text_type(os.path.abspath(self.csv1))
             self.read_csv(fname, index_col=0, parse_dates=True)
 
     def test_1000_sep(self):
@@ -651,21 +651,19 @@ def test_read_csv_parse_simple_list(self):
         tm.assert_frame_equal(df, expected)
 
     @tm.network
-    def test_url(self):
+    def test_url(self, datapath):
         # HTTP(S)
         url = ('https://raw.github.com/pandas-dev/pandas/master/'
                'pandas/tests/io/parser/data/salaries.csv')
         url_table = self.read_table(url)
-        dirpath = tm.get_data_path()
-        localtable = os.path.join(dirpath, 'salaries.csv')
+        localtable = datapath('io', 'parser', 'data', 'salaries.csv')
         local_table = self.read_table(localtable)
         tm.assert_frame_equal(url_table, local_table)
         # TODO: ftp testing
 
     @pytest.mark.slow
-    def test_file(self):
-        dirpath = tm.get_data_path()
-        localtable = os.path.join(dirpath, 'salaries.csv')
+    def test_file(self, datapath):
+        localtable = datapath('io', 'parser', 'data', 'salaries.csv')
         local_table = self.read_table(localtable)
 
         try:
@@ -755,8 +753,8 @@ def test_utf16_bom_skiprows(self):
 
                     tm.assert_frame_equal(result, expected)
 
-    def test_utf16_example(self):
-        path = tm.get_data_path('utf16_ex.txt')
+    def test_utf16_example(self, datapath):
+        path = datapath('io', 'parser', 'data', 'utf16_ex.txt')
 
         # it works! and is the right length
         result = self.read_table(path, encoding='utf-16')
@@ -767,8 +765,8 @@ def test_utf16_example(self):
             result = self.read_table(buf, encoding='utf-16')
             assert len(result) == 50
 
-    def test_unicode_encoding(self):
-        pth = tm.get_data_path('unicode_series.csv')
+    def test_unicode_encoding(self, datapath):
+        pth = datapath('io', 'parser', 'data', 'unicode_series.csv')
 
         result = self.read_csv(pth, header=None, encoding='latin-1')
         result = result.set_index(0)
@@ -1513,10 +1511,9 @@ def test_internal_eof_byte_to_file(self):
             result = self.read_csv(path)
             tm.assert_frame_equal(result, expected)
 
-    def test_sub_character(self):
+    def test_sub_character(self, datapath):
         # see gh-16893
-        dirpath = tm.get_data_path()
-        filename = os.path.join(dirpath, "sub_char.csv")
+        filename = datapath('io', 'parser', 'data', 'sub_char.csv')
 
         expected = DataFrame([[1, 2, 3]], columns=["a", "\x1ab", "c"])
         result = self.read_csv(filename)
diff --git a/pandas/tests/io/parser/compression.py b/pandas/tests/io/parser/compression.py
index e84db66561c49..e4950af19ea95 100644
--- a/pandas/tests/io/parser/compression.py
+++ b/pandas/tests/io/parser/compression.py
@@ -120,9 +120,9 @@ def test_read_csv_infer_compression(self):
 
                 tm.assert_frame_equal(expected, df)
 
-    def test_read_csv_compressed_utf16_example(self):
+    def test_read_csv_compressed_utf16_example(self, datapath):
         # GH18071
-        path = tm.get_data_path('utf16_ex_small.zip')
+        path = datapath('io', 'parser', 'data', 'utf16_ex_small.zip')
 
         result = self.read_csv(path, encoding='utf-16',
                                compression='zip', sep='\t')
diff --git a/pandas/tests/io/parser/dtypes.py b/pandas/tests/io/parser/dtypes.py
index b91ce04673e29..8060ebf2fbcd4 100644
--- a/pandas/tests/io/parser/dtypes.py
+++ b/pandas/tests/io/parser/dtypes.py
@@ -125,9 +125,9 @@ def test_categorical_dtype_high_cardinality_numeric(self):
             np.sort(actual.a.cat.categories), ordered=True)
         tm.assert_frame_equal(actual, expected)
 
-    def test_categorical_dtype_encoding(self):
+    def test_categorical_dtype_encoding(self, datapath):
         # GH 10153
-        pth = tm.get_data_path('unicode_series.csv')
+        pth = datapath('io', 'parser', 'data', 'unicode_series.csv')
         encoding = 'latin-1'
         expected = self.read_csv(pth, header=None, encoding=encoding)
         expected[1] = Categorical(expected[1])
@@ -135,7 +135,7 @@ def test_categorical_dtype_encoding(self):
                                dtype={1: 'category'})
         tm.assert_frame_equal(actual, expected)
 
-        pth = tm.get_data_path('utf16_ex.txt')
+        pth = datapath('io', 'parser', 'data', 'utf16_ex.txt')
         encoding = 'utf-16'
         expected = self.read_table(pth, encoding=encoding)
         expected = expected.apply(Categorical)
diff --git a/pandas/tests/io/parser/test_network.py b/pandas/tests/io/parser/test_network.py
index fdf45f307e953..e2243b8087a5b 100644
--- a/pandas/tests/io/parser/test_network.py
+++ b/pandas/tests/io/parser/test_network.py
@@ -48,10 +48,16 @@ def check_compressed_urls(salaries_table, compression, extension, mode,
     tm.assert_frame_equal(url_table, salaries_table)
 
 
+@pytest.fixture
+def tips_df(datapath):
+    """DataFrame with the tips dataset."""
+    return read_csv(datapath('io', 'parser', 'data', 'tips.csv'))
+
+
 @pytest.mark.usefixtures("s3_resource")
 class TestS3(object):
 
-    def test_parse_public_s3_bucket(self):
+    def test_parse_public_s3_bucket(self, tips_df):
         pytest.importorskip('s3fs')
         # more of an integration test due to the not-public contents portion
         # can probably mock this though.
@@ -60,45 +66,40 @@ def test_parse_public_s3_bucket(self):
                           ext, compression=comp)
             assert isinstance(df, DataFrame)
             assert not df.empty
-            tm.assert_frame_equal(read_csv(
-                tm.get_data_path('tips.csv')), df)
+            tm.assert_frame_equal(df, tips_df)
 
         # Read public file from bucket with not-public contents
         df = read_csv('s3://cant_get_it/tips.csv')
         assert isinstance(df, DataFrame)
         assert not df.empty
-        tm.assert_frame_equal(read_csv(tm.get_data_path('tips.csv')), df)
+        tm.assert_frame_equal(df, tips_df)
 
-    def test_parse_public_s3n_bucket(self):
+    def test_parse_public_s3n_bucket(self, tips_df):
 
         # Read from AWS s3 as "s3n" URL
         df = read_csv('s3n://pandas-test/tips.csv', nrows=10)
         assert isinstance(df, DataFrame)
         assert not df.empty
-        tm.assert_frame_equal(read_csv(
-            tm.get_data_path('tips.csv')).iloc[:10], df)
+        tm.assert_frame_equal(tips_df.iloc[:10], df)
 
-    def test_parse_public_s3a_bucket(self):
+    def test_parse_public_s3a_bucket(self, tips_df):
         # Read from AWS s3 as "s3a" URL
         df = read_csv('s3a://pandas-test/tips.csv', nrows=10)
         assert isinstance(df, DataFrame)
         assert not df.empty
-        tm.assert_frame_equal(read_csv(
-            tm.get_data_path('tips.csv')).iloc[:10], df)
+        tm.assert_frame_equal(tips_df.iloc[:10], df)
 
-    def test_parse_public_s3_bucket_nrows(self):
+    def test_parse_public_s3_bucket_nrows(self, tips_df):
         for ext, comp in [('', None), ('.gz', 'gzip'), ('.bz2', 'bz2')]:
             df = read_csv('s3://pandas-test/tips.csv' +
                           ext, nrows=10, compression=comp)
             assert isinstance(df, DataFrame)
             assert not df.empty
-            tm.assert_frame_equal(read_csv(
-                tm.get_data_path('tips.csv')).iloc[:10], df)
+            tm.assert_frame_equal(tips_df.iloc[:10], df)
 
-    def test_parse_public_s3_bucket_chunked(self):
+    def test_parse_public_s3_bucket_chunked(self, tips_df):
         # Read with a chunksize
         chunksize = 5
-        local_tips = read_csv(tm.get_data_path('tips.csv'))
         for ext, comp in [('', None), ('.gz', 'gzip'), ('.bz2', 'bz2')]:
             df_reader = read_csv('s3://pandas-test/tips.csv' + ext,
                                  chunksize=chunksize, compression=comp)
@@ -109,14 +110,13 @@ def test_parse_public_s3_bucket_chunked(self):
                 df = df_reader.get_chunk()
                 assert isinstance(df, DataFrame)
                 assert not df.empty
-                true_df = local_tips.iloc[
+                true_df = tips_df.iloc[
                     chunksize * i_chunk: chunksize * (i_chunk + 1)]
                 tm.assert_frame_equal(true_df, df)
 
-    def test_parse_public_s3_bucket_chunked_python(self):
+    def test_parse_public_s3_bucket_chunked_python(self, tips_df):
         # Read with a chunksize using the Python parser
         chunksize = 5
-        local_tips = read_csv(tm.get_data_path('tips.csv'))
         for ext, comp in [('', None), ('.gz', 'gzip'), ('.bz2', 'bz2')]:
             df_reader = read_csv('s3://pandas-test/tips.csv' + ext,
                                  chunksize=chunksize, compression=comp,
@@ -127,36 +127,33 @@ def test_parse_public_s3_bucket_chunked_python(self):
                 df = df_reader.get_chunk()
                 assert isinstance(df, DataFrame)
                 assert not df.empty
-                true_df = local_tips.iloc[
+                true_df = tips_df.iloc[
                     chunksize * i_chunk: chunksize * (i_chunk + 1)]
                 tm.assert_frame_equal(true_df, df)
 
-    def test_parse_public_s3_bucket_python(self):
+    def test_parse_public_s3_bucket_python(self, tips_df):
         for ext, comp in [('', None), ('.gz', 'gzip'), ('.bz2', 'bz2')]:
             df = read_csv('s3://pandas-test/tips.csv' + ext, engine='python',
                           compression=comp)
             assert isinstance(df, DataFrame)
             assert not df.empty
-            tm.assert_frame_equal(read_csv(
-                tm.get_data_path('tips.csv')), df)
+            tm.assert_frame_equal(df, tips_df)
 
-    def test_infer_s3_compression(self):
+    def test_infer_s3_compression(self, tips_df):
         for ext in ['', '.gz', '.bz2']:
             df = read_csv('s3://pandas-test/tips.csv' + ext,
                           engine='python', compression='infer')
             assert isinstance(df, DataFrame)
             assert not df.empty
-            tm.assert_frame_equal(read_csv(
-                tm.get_data_path('tips.csv')), df)
+            tm.assert_frame_equal(df, tips_df)
 
-    def test_parse_public_s3_bucket_nrows_python(self):
+    def test_parse_public_s3_bucket_nrows_python(self, tips_df):
         for ext, comp in [('', None), ('.gz', 'gzip'), ('.bz2', 'bz2')]:
             df = read_csv('s3://pandas-test/tips.csv' + ext, engine='python',
                           nrows=10, compression=comp)
             assert isinstance(df, DataFrame)
             assert not df.empty
-            tm.assert_frame_equal(read_csv(
-                tm.get_data_path('tips.csv')).iloc[:10], df)
+            tm.assert_frame_equal(tips_df.iloc[:10], df)
 
     def test_s3_fails(self):
         with pytest.raises(IOError):
diff --git a/pandas/tests/io/parser/test_parsers.py b/pandas/tests/io/parser/test_parsers.py
index 7717102b64fc5..b6f13039641a2 100644
--- a/pandas/tests/io/parser/test_parsers.py
+++ b/pandas/tests/io/parser/test_parsers.py
@@ -1,6 +1,7 @@
 # -*- coding: utf-8 -*-
 
 import os
+import pytest
 import pandas.util.testing as tm
 
 from pandas import read_csv, read_table, DataFrame
@@ -45,8 +46,9 @@ def read_table(self, *args, **kwargs):
     def float_precision_choices(self):
         raise com.AbstractMethodError(self)
 
-    def setup_method(self, method):
-        self.dirpath = tm.get_data_path()
+    @pytest.fixture(autouse=True)
+    def setup_method(self, datapath):
+        self.dirpath = datapath('io', 'parser', 'data')
         self.csv1 = os.path.join(self.dirpath, 'test1.csv')
         self.csv2 = os.path.join(self.dirpath, 'test2.csv')
         self.xls1 = os.path.join(self.dirpath, 'test.xls')
diff --git a/pandas/tests/io/parser/test_textreader.py b/pandas/tests/io/parser/test_textreader.py
index e8d9d8b52164b..c7026e3e0fc88 100644
--- a/pandas/tests/io/parser/test_textreader.py
+++ b/pandas/tests/io/parser/test_textreader.py
@@ -28,8 +28,9 @@
 
 class TestTextReader(object):
 
-    def setup_method(self, method):
-        self.dirpath = tm.get_data_path()
+    @pytest.fixture(autouse=True)
+    def setup_method(self, datapath):
+        self.dirpath = datapath('io', 'parser', 'data')
         self.csv1 = os.path.join(self.dirpath, 'test1.csv')
         self.csv2 = os.path.join(self.dirpath, 'test2.csv')
         self.xls1 = os.path.join(self.dirpath, 'test.xls')
diff --git a/pandas/tests/io/sas/test_sas7bdat.py b/pandas/tests/io/sas/test_sas7bdat.py
index b80263021c269..101ee3e619f5b 100644
--- a/pandas/tests/io/sas/test_sas7bdat.py
+++ b/pandas/tests/io/sas/test_sas7bdat.py
@@ -11,8 +11,9 @@
 
 class TestSAS7BDAT(object):
 
-    def setup_method(self, method):
-        self.dirpath = tm.get_data_path()
+    @pytest.fixture(autouse=True)
+    def setup_method(self, datapath):
+        self.dirpath = datapath("io", "sas", "data")
         self.data = []
         self.test_ix = [list(range(1, 16)), [16]]
         for j in 1, 2:
@@ -123,9 +124,8 @@ def test_iterator_read_too_much(self):
         rdr.close()
 
 
-def test_encoding_options():
-    dirpath = tm.get_data_path()
-    fname = os.path.join(dirpath, "test1.sas7bdat")
+def test_encoding_options(datapath):
+    fname = datapath("io", "sas", "data", "test1.sas7bdat")
     df1 = pd.read_sas(fname)
     df2 = pd.read_sas(fname, encoding='utf-8')
     for col in df1.columns:
@@ -143,43 +143,39 @@ def test_encoding_options():
         assert(x == y.decode())
 
 
-def test_productsales():
-    dirpath = tm.get_data_path()
-    fname = os.path.join(dirpath, "productsales.sas7bdat")
+def test_productsales(datapath):
+    fname = datapath("io", "sas", "data", "productsales.sas7bdat")
     df = pd.read_sas(fname, encoding='utf-8')
-    fname = os.path.join(dirpath, "productsales.csv")
+    fname = datapath("io", "sas", "data", "productsales.csv")
     df0 = pd.read_csv(fname, parse_dates=['MONTH'])
     vn = ["ACTUAL", "PREDICT", "QUARTER", "YEAR"]
     df0[vn] = df0[vn].astype(np.float64)
     tm.assert_frame_equal(df, df0)
 
 
-def test_12659():
-    dirpath = tm.get_data_path()
-    fname = os.path.join(dirpath, "test_12659.sas7bdat")
+def test_12659(datapath):
+    fname = datapath("io", "sas", "data", "test_12659.sas7bdat")
     df = pd.read_sas(fname)
-    fname = os.path.join(dirpath, "test_12659.csv")
+    fname = datapath("io", "sas", "data", "test_12659.csv")
     df0 = pd.read_csv(fname)
     df0 = df0.astype(np.float64)
     tm.assert_frame_equal(df, df0)
 
 
-def test_airline():
-    dirpath = tm.get_data_path()
-    fname = os.path.join(dirpath, "airline.sas7bdat")
+def test_airline(datapath):
+    fname = datapath("io", "sas", "data", "airline.sas7bdat")
     df = pd.read_sas(fname)
-    fname = os.path.join(dirpath, "airline.csv")
+    fname = datapath("io", "sas", "data", "airline.csv")
     df0 = pd.read_csv(fname)
     df0 = df0.astype(np.float64)
     tm.assert_frame_equal(df, df0, check_exact=False)
 
 
-def test_date_time():
+def test_date_time(datapath):
     # Support of different SAS date/datetime formats (PR #15871)
-    dirpath = tm.get_data_path()
-    fname = os.path.join(dirpath, "datetime.sas7bdat")
+    fname = datapath("io", "sas", "data", "datetime.sas7bdat")
     df = pd.read_sas(fname)
-    fname = os.path.join(dirpath, "datetime.csv")
+    fname = datapath("io", "sas", "data", "datetime.csv")
     df0 = pd.read_csv(fname, parse_dates=['Date1', 'Date2', 'DateTime',
                                           'DateTimeHi', 'Taiw'])
     # GH 19732: Timestamps imported from sas will incur floating point errors
@@ -187,9 +183,8 @@ def test_date_time():
     tm.assert_frame_equal(df, df0)
 
 
-def test_zero_variables():
+def test_zero_variables(datapath):
     # Check if the SAS file has zero variables (PR #18184)
-    dirpath = tm.get_data_path()
-    fname = os.path.join(dirpath, "zero_variables.sas7bdat")
+    fname = datapath("io", "sas", "data", "zero_variables.sas7bdat")
     with pytest.raises(EmptyDataError):
         pd.read_sas(fname)
diff --git a/pandas/tests/io/sas/test_xport.py b/pandas/tests/io/sas/test_xport.py
index de31c3e36a8d5..6e5b2ab067aa5 100644
--- a/pandas/tests/io/sas/test_xport.py
+++ b/pandas/tests/io/sas/test_xport.py
@@ -1,3 +1,4 @@
+import pytest
 import pandas as pd
 import pandas.util.testing as tm
 from pandas.io.sas.sasreader import read_sas
@@ -18,8 +19,9 @@ def numeric_as_float(data):
 
 class TestXport(object):
 
-    def setup_method(self, method):
-        self.dirpath = tm.get_data_path()
+    @pytest.fixture(autouse=True)
+    def setup_method(self, datapath):
+        self.dirpath = datapath("io", "sas", "data")
         self.file01 = os.path.join(self.dirpath, "DEMO_G.xpt")
         self.file02 = os.path.join(self.dirpath, "SSHSV1_A.xpt")
         self.file03 = os.path.join(self.dirpath, "DRXFCD_G.xpt")
diff --git a/pandas/tests/io/test_common.py b/pandas/tests/io/test_common.py
index a89156db38ae3..5c9739be73393 100644
--- a/pandas/tests/io/test_common.py
+++ b/pandas/tests/io/test_common.py
@@ -149,27 +149,22 @@ def test_read_non_existant(self, reader, module, error_class, fn_ext):
             reader(path)
 
     @pytest.mark.parametrize('reader, module, path', [
-        (pd.read_csv, 'os', os.path.join(HERE, 'data', 'iris.csv')),
-        (pd.read_table, 'os', os.path.join(HERE, 'data', 'iris.csv')),
-        (pd.read_fwf, 'os', os.path.join(HERE, 'data',
-                                         'fixed_width_format.txt')),
-        (pd.read_excel, 'xlrd', os.path.join(HERE, 'data', 'test1.xlsx')),
-        (pd.read_feather, 'feather', os.path.join(HERE, 'data',
-                                                  'feather-0_3_1.feather')),
-        (pd.read_hdf, 'tables', os.path.join(HERE, 'data', 'legacy_hdf',
-                                             'datetimetz_object.h5')),
-        (pd.read_stata, 'os', os.path.join(HERE, 'data', 'stata10_115.dta')),
-        (pd.read_sas, 'os', os.path.join(HERE, 'sas', 'data',
-                                         'test1.sas7bdat')),
-        (pd.read_json, 'os', os.path.join(HERE, 'json', 'data',
-                                          'tsframe_v012.json')),
-        (pd.read_msgpack, 'os', os.path.join(HERE, 'msgpack', 'data',
-                                             'frame.mp')),
-        (pd.read_pickle, 'os', os.path.join(HERE, 'data',
-                                            'categorical_0_14_1.pickle')),
+        (pd.read_csv, 'os', ('io', 'data', 'iris.csv')),
+        (pd.read_table, 'os', ('io', 'data', 'iris.csv')),
+        (pd.read_fwf, 'os', ('io', 'data', 'fixed_width_format.txt')),
+        (pd.read_excel, 'xlrd', ('io', 'data', 'test1.xlsx')),
+        (pd.read_feather, 'feather', ('io', 'data', 'feather-0_3_1.feather')),
+        (pd.read_hdf, 'tables', ('io', 'data', 'legacy_hdf',
+                                 'datetimetz_object.h5')),
+        (pd.read_stata, 'os', ('io', 'data', 'stata10_115.dta')),
+        (pd.read_sas, 'os', ('io', 'sas', 'data', 'test1.sas7bdat')),
+        (pd.read_json, 'os', ('io', 'json', 'data', 'tsframe_v012.json')),
+        (pd.read_msgpack, 'os', ('io', 'msgpack', 'data', 'frame.mp')),
+        (pd.read_pickle, 'os', ('io', 'data', 'categorical_0_14_1.pickle')),
     ])
-    def test_read_fspath_all(self, reader, module, path):
+    def test_read_fspath_all(self, reader, module, path, datapath):
         pytest.importorskip(module)
+        path = datapath(*path)
 
         mypath = CustomFSPath(path)
         result = reader(mypath)
@@ -232,13 +227,14 @@ def test_write_fspath_hdf5(self):
         tm.assert_frame_equal(result, expected)
 
 
-class TestMMapWrapper(object):
+@pytest.fixture
+def mmap_file(datapath):
+    return datapath('io', 'data', 'test_mmap.csv')
+
 
-    def setup_method(self, method):
-        self.mmap_file = os.path.join(tm.get_data_path(),
-                                      'test_mmap.csv')
+class TestMMapWrapper(object):
 
-    def test_constructor_bad_file(self):
+    def test_constructor_bad_file(self, mmap_file):
         non_file = StringIO('I am not a file')
         non_file.fileno = lambda: -1
 
@@ -252,15 +248,15 @@ def test_constructor_bad_file(self):
 
         tm.assert_raises_regex(err, msg, common.MMapWrapper, non_file)
 
-        target = open(self.mmap_file, 'r')
+        target = open(mmap_file, 'r')
         target.close()
 
         msg = "I/O operation on closed file"
         tm.assert_raises_regex(
             ValueError, msg, common.MMapWrapper, target)
 
-    def test_get_attr(self):
-        with open(self.mmap_file, 'r') as target:
+    def test_get_attr(self, mmap_file):
+        with open(mmap_file, 'r') as target:
             wrapper = common.MMapWrapper(target)
 
         attrs = dir(wrapper.mmap)
@@ -273,8 +269,8 @@ def test_get_attr(self):
 
         assert not hasattr(wrapper, 'foo')
 
-    def test_next(self):
-        with open(self.mmap_file, 'r') as target:
+    def test_next(self, mmap_file):
+        with open(mmap_file, 'r') as target:
             wrapper = common.MMapWrapper(target)
             lines = target.readlines()
 
diff --git a/pandas/tests/io/test_excel.py b/pandas/tests/io/test_excel.py
index 05423474f330a..4e2b2af0ebfe7 100644
--- a/pandas/tests/io/test_excel.py
+++ b/pandas/tests/io/test_excel.py
@@ -39,8 +39,9 @@
 @td.skip_if_no('xlrd', '0.9')
 class SharedItems(object):
 
-    def setup_method(self, method):
-        self.dirpath = tm.get_data_path()
+    @pytest.fixture(autouse=True)
+    def setup_method(self, datapath):
+        self.dirpath = datapath("io", "data")
         self.frame = _frame.copy()
         self.frame2 = _frame2.copy()
         self.tsframe = _tsframe.copy()
@@ -49,7 +50,6 @@ def setup_method(self, method):
     def get_csv_refdf(self, basename):
         """
         Obtain the reference data from read_csv with the Python engine.
-        Test data path is defined by pandas.util.testing.get_data_path()
 
         Parameters
         ----------
@@ -68,8 +68,7 @@ def get_csv_refdf(self, basename):
 
     def get_excelfile(self, basename, ext):
         """
-        Return test data ExcelFile instance. Test data path is defined by
-        pandas.util.testing.get_data_path()
+        Return test data ExcelFile instance.
 
         Parameters
         ----------
@@ -86,8 +85,7 @@ def get_excelfile(self, basename, ext):
 
     def get_exceldf(self, basename, ext, *args, **kwds):
         """
-        Return test data DataFrame. Test data path is defined by
-        pandas.util.testing.get_data_path()
+        Return test data DataFrame.
 
         Parameters
         ----------
diff --git a/pandas/tests/io/test_html.py b/pandas/tests/io/test_html.py
index a56946b82b027..9c6a8de7ed446 100644
--- a/pandas/tests/io/test_html.py
+++ b/pandas/tests/io/test_html.py
@@ -1,6 +1,5 @@
 from __future__ import print_function
 
-import glob
 import os
 import re
 import threading
@@ -25,8 +24,18 @@
 import pandas.util._test_decorators as td
 from pandas.util.testing import makeCustomDataframe as mkdf, network
 
+HERE = os.path.dirname(__file__)
 
-DATA_PATH = tm.get_data_path()
+
+@pytest.fixture(params=[
+    'chinese_utf-16.html',
+    'chinese_utf-32.html',
+    'chinese_utf-8.html',
+    'letz_latin1.html',
+])
+def html_encoding_file(request, datapath):
+    """Parametrized fixture for HTML encoding test filenames."""
+    return datapath('io', 'data', 'html_encoding', request.param)
 
 
 def assert_framelist_equal(list1, list2, *args, **kwargs):
@@ -44,11 +53,11 @@ def assert_framelist_equal(list1, list2, *args, **kwargs):
 
 
 @td.skip_if_no('bs4')
-def test_bs4_version_fails(monkeypatch):
+def test_bs4_version_fails(monkeypatch, datapath):
     import bs4
     monkeypatch.setattr(bs4, '__version__', '4.2')
     with tm.assert_raises_regex(ValueError, "minimum version"):
-        read_html(os.path.join(DATA_PATH, "spam.html"), flavor='bs4')
+        read_html(datapath("io", "data", "spam.html"), flavor='bs4')
 
 
 def test_invalid_flavor():
@@ -59,8 +68,8 @@ def test_invalid_flavor():
 
 @td.skip_if_no('bs4')
 @td.skip_if_no('lxml')
-def test_same_ordering():
-    filename = os.path.join(DATA_PATH, 'valid_markup.html')
+def test_same_ordering(datapath):
+    filename = datapath('io', 'data', 'valid_markup.html')
     dfs_lxml = read_html(filename, index_col=0, flavor=['lxml'])
     dfs_bs4 = read_html(filename, index_col=0, flavor=['bs4'])
     assert_framelist_equal(dfs_lxml, dfs_bs4)
@@ -72,11 +81,14 @@ def test_same_ordering():
     pytest.param('lxml', marks=pytest.mark.skipif(
         not td.safe_import('lxml'), reason='No lxml'))], scope="class")
 class TestReadHtml(object):
-    spam_data = os.path.join(DATA_PATH, 'spam.html')
-    spam_data_kwargs = {}
-    if PY3:
-        spam_data_kwargs['encoding'] = 'UTF-8'
-    banklist_data = os.path.join(DATA_PATH, 'banklist.html')
+
+    @pytest.fixture(autouse=True)
+    def set_files(self, datapath):
+        self.spam_data = datapath('io', 'data', 'spam.html')
+        self.spam_data_kwargs = {}
+        if PY3:
+            self.spam_data_kwargs['encoding'] = 'UTF-8'
+        self.banklist_data = datapath("io", "data", "banklist.html")
 
     @pytest.fixture(autouse=True, scope="function")
     def set_defaults(self, flavor, request):
@@ -272,7 +284,8 @@ def test_invalid_url(self):
     @pytest.mark.slow
     def test_file_url(self):
         url = self.banklist_data
-        dfs = self.read_html(file_path_to_url(url), 'First',
+        dfs = self.read_html(file_path_to_url(os.path.abspath(url)),
+                             'First',
                              attrs={'id': 'table'})
         assert isinstance(dfs, list)
         for df in dfs:
@@ -326,7 +339,7 @@ def test_multiindex_header_index_skiprows(self):
     @pytest.mark.slow
     def test_regex_idempotency(self):
         url = self.banklist_data
-        dfs = self.read_html(file_path_to_url(url),
+        dfs = self.read_html(file_path_to_url(os.path.abspath(url)),
                              match=re.compile(re.compile('Florida')),
                              attrs={'id': 'table'})
         assert isinstance(dfs, list)
@@ -352,9 +365,9 @@ def test_python_docs_table(self):
         assert sorted(zz) == sorted(['Repo', 'What'])
 
     @pytest.mark.slow
-    def test_thousands_macau_stats(self):
+    def test_thousands_macau_stats(self, datapath):
         all_non_nan_table_index = -2
-        macau_data = os.path.join(DATA_PATH, 'macau.html')
+        macau_data = datapath("io", "data", "macau.html")
         dfs = self.read_html(macau_data, index_col=0,
                              attrs={'class': 'style1'})
         df = dfs[all_non_nan_table_index]
@@ -362,9 +375,9 @@ def test_thousands_macau_stats(self):
         assert not any(s.isna().any() for _, s in df.iteritems())
 
     @pytest.mark.slow
-    def test_thousands_macau_index_col(self):
+    def test_thousands_macau_index_col(self, datapath):
         all_non_nan_table_index = -2
-        macau_data = os.path.join(DATA_PATH, 'macau.html')
+        macau_data = datapath('io', 'data', 'macau.html')
         dfs = self.read_html(macau_data, index_col=0, header=0)
         df = dfs[all_non_nan_table_index]
 
@@ -518,8 +531,8 @@ def test_countries_municipalities(self):
         res2 = self.read_html(data2, header=0)
         assert_framelist_equal(res1, res2)
 
-    def test_nyse_wsj_commas_table(self):
-        data = os.path.join(DATA_PATH, 'nyse_wsj.html')
+    def test_nyse_wsj_commas_table(self, datapath):
+        data = datapath('io', 'data', 'nyse_wsj.html')
         df = self.read_html(data, index_col=0, header=0,
                             attrs={'class': 'mdcTable'})[0]
 
@@ -530,7 +543,7 @@ def test_nyse_wsj_commas_table(self):
         tm.assert_index_equal(df.columns, columns)
 
     @pytest.mark.slow
-    def test_banklist_header(self):
+    def test_banklist_header(self, datapath):
         from pandas.io.html import _remove_whitespace
 
         def try_remove_ws(x):
@@ -541,7 +554,7 @@ def try_remove_ws(x):
 
         df = self.read_html(self.banklist_data, 'Metcalf',
                             attrs={'id': 'table'})[0]
-        ground_truth = read_csv(os.path.join(DATA_PATH, 'banklist.csv'),
+        ground_truth = read_csv(datapath('io', 'data', 'banklist.csv'),
                                 converters={'Updated Date': Timestamp,
                                             'Closing Date': Timestamp})
         assert df.shape == ground_truth.shape
@@ -658,19 +671,19 @@ def test_parse_dates_combine(self):
         newdf = DataFrame({'datetime': raw_dates})
         tm.assert_frame_equal(newdf, res[0])
 
-    def test_computer_sales_page(self):
-        data = os.path.join(DATA_PATH, 'computer_sales_page.html')
+    def test_computer_sales_page(self, datapath):
+        data = datapath('io', 'data', 'computer_sales_page.html')
         with tm.assert_raises_regex(ParserError,
                                     r"Passed header=\[0,1\] are "
                                     r"too many rows for this "
                                     r"multi_index of columns"):
             self.read_html(data, header=[0, 1])
 
-        data = os.path.join(DATA_PATH, 'computer_sales_page.html')
+        data = datapath('io', 'data', 'computer_sales_page.html')
         assert self.read_html(data, header=[1, 2])
 
-    def test_wikipedia_states_table(self):
-        data = os.path.join(DATA_PATH, 'wikipedia_states.html')
+    def test_wikipedia_states_table(self, datapath):
+        data = datapath('io', 'data', 'wikipedia_states.html')
         assert os.path.isfile(data), '%r is not a file' % data
         assert os.path.getsize(data), '%r is an empty file' % data
         result = self.read_html(data, 'Arizona', header=1)[0]
@@ -784,15 +797,15 @@ def test_multiple_header_rows(self):
         html_df = read_html(html, )[0]
         tm.assert_frame_equal(expected_df, html_df)
 
-    def test_works_on_valid_markup(self):
-        filename = os.path.join(DATA_PATH, 'valid_markup.html')
+    def test_works_on_valid_markup(self, datapath):
+        filename = datapath('io', 'data', 'valid_markup.html')
         dfs = self.read_html(filename, index_col=0)
         assert isinstance(dfs, list)
         assert isinstance(dfs[0], DataFrame)
 
     @pytest.mark.slow
-    def test_fallback_success(self):
-        banklist_data = os.path.join(DATA_PATH, 'banklist.html')
+    def test_fallback_success(self, datapath):
+        banklist_data = datapath('io', 'data', 'banklist.html')
         self.read_html(banklist_data, '.*Water.*', flavor=['lxml', 'html5lib'])
 
     def test_to_html_timestamp(self):
@@ -835,22 +848,23 @@ def test_displayed_only(self, displayed_only, exp0, exp1):
         else:
             assert len(dfs) == 1  # Should not parse hidden table
 
-    @pytest.mark.parametrize("f", glob.glob(
-        os.path.join(DATA_PATH, 'html_encoding', '*.html')))
-    def test_encode(self, f):
-        _, encoding = os.path.splitext(os.path.basename(f))[0].split('_')
+    def test_encode(self, html_encoding_file):
+        _, encoding = os.path.splitext(
+            os.path.basename(html_encoding_file)
+        )[0].split('_')
 
         try:
-            with open(f, 'rb') as fobj:
+            with open(html_encoding_file, 'rb') as fobj:
                 from_string = self.read_html(fobj.read(), encoding=encoding,
                                              index_col=0).pop()
 
-            with open(f, 'rb') as fobj:
+            with open(html_encoding_file, 'rb') as fobj:
                 from_file_like = self.read_html(BytesIO(fobj.read()),
                                                 encoding=encoding,
                                                 index_col=0).pop()
 
-            from_filename = self.read_html(f, encoding=encoding,
+            from_filename = self.read_html(html_encoding_file,
+                                           encoding=encoding,
                                            index_col=0).pop()
             tm.assert_frame_equal(from_string, from_file_like)
             tm.assert_frame_equal(from_string, from_filename)
@@ -906,7 +920,7 @@ def seekable(self):
         assert self.read_html(bad)
 
     @pytest.mark.slow
-    def test_importcheck_thread_safety(self):
+    def test_importcheck_thread_safety(self, datapath):
         # see gh-16928
 
         class ErrorThread(threading.Thread):
@@ -921,7 +935,7 @@ def run(self):
         # force import check by reinitalising global vars in html.py
         reload(pandas.io.html)
 
-        filename = os.path.join(DATA_PATH, 'valid_markup.html')
+        filename = datapath('io', 'data', 'valid_markup.html')
         helper_thread1 = ErrorThread(target=self.read_html, args=(filename,))
         helper_thread2 = ErrorThread(target=self.read_html, args=(filename,))
 
diff --git a/pandas/tests/io/test_packers.py b/pandas/tests/io/test_packers.py
index cfac77291803d..491d5fe33cc33 100644
--- a/pandas/tests/io/test_packers.py
+++ b/pandas/tests/io/test_packers.py
@@ -3,6 +3,7 @@
 from warnings import catch_warnings
 import os
 import datetime
+import glob
 import numpy as np
 from distutils.version import LooseVersion
 
@@ -837,13 +838,13 @@ def test_default_encoding(self):
             assert_frame_equal(result, frame)
 
 
-def legacy_packers_versions():
-    # yield the packers versions
-    path = tm.get_data_path('legacy_msgpack')
-    for v in os.listdir(path):
-        p = os.path.join(path, v)
-        if os.path.isdir(p):
-            yield v
+files = glob.glob(os.path.join(os.path.dirname(__file__), "data",
+                               "legacy_msgpack", "*", "*.msgpack"))
+
+
+@pytest.fixture(params=files)
+def legacy_packer(request, datapath):
+    return datapath(request.param)
 
 
 class TestMsgpack(object):
@@ -920,24 +921,20 @@ def compare_frame_dt_mixed_tzs(self, result, expected, typ, version):
         else:
             tm.assert_frame_equal(result, expected)
 
-    @pytest.mark.parametrize('version', legacy_packers_versions())
     def test_msgpacks_legacy(self, current_packers_data, all_packers_data,
-                             version):
-
-        pth = tm.get_data_path('legacy_msgpack/{0}'.format(version))
-        n = 0
-        for f in os.listdir(pth):
-            # GH12142 0.17 files packed in P2 can't be read in P3
-            if (compat.PY3 and version.startswith('0.17.') and
-                    f.split('.')[-4][-1] == '2'):
-                continue
-            vf = os.path.join(pth, f)
-            try:
-                with catch_warnings(record=True):
-                    self.compare(current_packers_data, all_packers_data,
-                                 vf, version)
-            except ImportError:
-                # blosc not installed
-                continue
-            n += 1
-        assert n > 0, 'Msgpack files are not tested'
+                             legacy_packer, datapath):
+
+        version = os.path.basename(os.path.dirname(legacy_packer))
+
+        # GH12142 0.17 files packed in P2 can't be read in P3
+        if (compat.PY3 and version.startswith('0.17.') and
+                legacy_packer.split('.')[-4][-1] == '2'):
+            msg = "Files packed in Py2 can't be read in Py3 ({})"
+            pytest.skip(msg.format(version))
+        try:
+            with catch_warnings(record=True):
+                self.compare(current_packers_data, all_packers_data,
+                             legacy_packer, version)
+        except ImportError:
+            # blosc not installed
+            pass
diff --git a/pandas/tests/io/test_pickle.py b/pandas/tests/io/test_pickle.py
index fbe2174e603e2..45cbbd43cd6a8 100644
--- a/pandas/tests/io/test_pickle.py
+++ b/pandas/tests/io/test_pickle.py
@@ -12,7 +12,7 @@
 
 3. Move the created pickle to "data/legacy_pickle/<version>" directory.
 """
-
+import glob
 import pytest
 from warnings import catch_warnings
 
@@ -184,27 +184,25 @@ def compare_sp_frame_float(result, expected, typ, version):
         tm.assert_sp_frame_equal(result, expected)
 
 
+files = glob.glob(os.path.join(os.path.dirname(__file__), "data",
+                  "legacy_pickle", "*", "*.pickle"))
+
+
+@pytest.fixture(params=files)
+def legacy_pickle(request, datapath):
+    return datapath(request.param)
+
+
 # ---------------------
 # tests
 # ---------------------
-def legacy_pickle_versions():
-    # yield the pickle versions
-    path = tm.get_data_path('legacy_pickle')
-    for v in os.listdir(path):
-        p = os.path.join(path, v)
-        if os.path.isdir(p):
-            for f in os.listdir(p):
-                yield (v, f)
-
-
-@pytest.mark.parametrize('version, f', legacy_pickle_versions())
-def test_pickles(current_pickle_data, version, f):
+def test_pickles(current_pickle_data, legacy_pickle):
     if not is_platform_little_endian():
         pytest.skip("known failure on non-little endian")
 
-    vf = tm.get_data_path('legacy_pickle/{}/{}'.format(version, f))
+    version = os.path.basename(os.path.dirname(legacy_pickle))
     with catch_warnings(record=True):
-        compare(current_pickle_data, vf, version)
+        compare(current_pickle_data, legacy_pickle, version)
 
 
 def test_round_trip_current(current_pickle_data):
@@ -260,12 +258,11 @@ def python_unpickler(path):
                     compare_element(result, expected, typ)
 
 
-def test_pickle_v0_14_1():
+def test_pickle_v0_14_1(datapath):
 
     cat = pd.Categorical(values=['a', 'b', 'c'], ordered=False,
                          categories=['a', 'b', 'c', 'd'])
-    pickle_path = os.path.join(tm.get_data_path(),
-                               'categorical_0_14_1.pickle')
+    pickle_path = datapath('io', 'data', 'categorical_0_14_1.pickle')
     # This code was executed once on v0.14.1 to generate the pickle:
     #
     # cat = Categorical(labels=np.arange(3), levels=['a', 'b', 'c', 'd'],
@@ -275,14 +272,13 @@ def test_pickle_v0_14_1():
     tm.assert_categorical_equal(cat, pd.read_pickle(pickle_path))
 
 
-def test_pickle_v0_15_2():
+def test_pickle_v0_15_2(datapath):
     # ordered -> _ordered
     # GH 9347
 
     cat = pd.Categorical(values=['a', 'b', 'c'], ordered=False,
                          categories=['a', 'b', 'c', 'd'])
-    pickle_path = os.path.join(tm.get_data_path(),
-                               'categorical_0_15_2.pickle')
+    pickle_path = datapath('io', 'data', 'categorical_0_15_2.pickle')
     # This code was executed once on v0.15.2 to generate the pickle:
     #
     # cat = Categorical(labels=np.arange(3), levels=['a', 'b', 'c', 'd'],
diff --git a/pandas/tests/io/test_pytables.py b/pandas/tests/io/test_pytables.py
index 5ac91c15047ff..9cbb62f72f0a0 100644
--- a/pandas/tests/io/test_pytables.py
+++ b/pandas/tests/io/test_pytables.py
@@ -4452,28 +4452,27 @@ def f():
                 store.select('df')
             tm.assert_raises_regex(ClosedFileError, 'file is not open', f)
 
-    def test_pytables_native_read(self):
-
+    def test_pytables_native_read(self, datapath):
         with ensure_clean_store(
-                tm.get_data_path('legacy_hdf/pytables_native.h5'),
+                datapath('io', 'data', 'legacy_hdf/pytables_native.h5'),
                 mode='r') as store:
             d2 = store['detector/readout']
             assert isinstance(d2, DataFrame)
 
     @pytest.mark.skipif(PY35 and is_platform_windows(),
                         reason="native2 read fails oddly on windows / 3.5")
-    def test_pytables_native2_read(self):
+    def test_pytables_native2_read(self, datapath):
         with ensure_clean_store(
-                tm.get_data_path('legacy_hdf/pytables_native2.h5'),
+                datapath('io', 'data', 'legacy_hdf', 'pytables_native2.h5'),
                 mode='r') as store:
             str(store)
             d1 = store['detector']
             assert isinstance(d1, DataFrame)
 
-    def test_legacy_table_read(self):
+    def test_legacy_table_read(self, datapath):
         # legacy table types
         with ensure_clean_store(
-                tm.get_data_path('legacy_hdf/legacy_table.h5'),
+                datapath('io', 'data', 'legacy_hdf', 'legacy_table.h5'),
                 mode='r') as store:
 
             with catch_warnings(record=True):
@@ -5120,7 +5119,7 @@ def test_fspath(self):
             with pd.HDFStore(path) as store:
                 assert os.fspath(store) == str(path)
 
-    def test_read_py2_hdf_file_in_py3(self):
+    def test_read_py2_hdf_file_in_py3(self, datapath):
         # GH 16781
 
         # tests reading a PeriodIndex DataFrame written in Python2 in Python3
@@ -5135,8 +5134,8 @@ def test_read_py2_hdf_file_in_py3(self):
             ['2015-01-01', '2015-01-02', '2015-01-05'], freq='B'))
 
         with ensure_clean_store(
-                tm.get_data_path(
-                    'legacy_hdf/periodindex_0.20.1_x86_64_darwin_2.7.13.h5'),
+                datapath('io', 'data', 'legacy_hdf',
+                         'periodindex_0.20.1_x86_64_darwin_2.7.13.h5'),
                 mode='r') as store:
             result = store['p']
             assert_frame_equal(result, expected)
@@ -5533,14 +5532,14 @@ def test_store_timezone(self):
 
             assert_frame_equal(result, df)
 
-    def test_legacy_datetimetz_object(self):
+    def test_legacy_datetimetz_object(self, datapath):
         # legacy from < 0.17.0
         # 8260
         expected = DataFrame(dict(A=Timestamp('20130102', tz='US/Eastern'),
                                   B=Timestamp('20130603', tz='CET')),
                              index=range(5))
         with ensure_clean_store(
-                tm.get_data_path('legacy_hdf/datetimetz_object.h5'),
+                datapath('io', 'data', 'legacy_hdf', 'datetimetz_object.h5'),
                 mode='r') as store:
             result = store['df']
             assert_frame_equal(result, expected)
diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py
index f3ab74d37a2bc..f8f742c5980ac 100644
--- a/pandas/tests/io/test_sql.py
+++ b/pandas/tests/io/test_sql.py
@@ -22,7 +22,6 @@
 import pytest
 import sqlite3
 import csv
-import os
 
 import warnings
 import numpy as np
@@ -184,9 +183,11 @@
 class MixInBase(object):
 
     def teardown_method(self, method):
-        for tbl in self._get_all_tables():
-            self.drop_table(tbl)
-        self._close_conn()
+        # if setup fails, there may not be a connection to close.
+        if hasattr(self, 'conn'):
+            for tbl in self._get_all_tables():
+                self.drop_table(tbl)
+            self._close_conn()
 
 
 class MySQLMixIn(MixInBase):
@@ -253,9 +254,9 @@ def _get_exec(self):
         else:
             return self.conn.cursor()
 
-    def _load_iris_data(self):
+    def _load_iris_data(self, datapath):
         import io
-        iris_csv_file = os.path.join(tm.get_data_path(), 'iris.csv')
+        iris_csv_file = datapath('io', 'data', 'iris.csv')
 
         self.drop_table('iris')
         self._get_exec().execute(SQL_STRINGS['create_iris'][self.flavor])
@@ -503,9 +504,10 @@ class _TestSQLApi(PandasSQLTest):
     flavor = 'sqlite'
     mode = None
 
-    def setup_method(self, method):
+    @pytest.fixture(autouse=True)
+    def setup_method(self, datapath):
         self.conn = self.connect()
-        self._load_iris_data()
+        self._load_iris_data(datapath)
         self._load_iris_view()
         self._load_test1_data()
         self._load_test2_data()
@@ -1025,8 +1027,9 @@ class _EngineToConnMixin(object):
     A mixin that causes setup_connect to create a conn rather than an engine.
     """
 
-    def setup_method(self, method):
-        super(_EngineToConnMixin, self).setup_method(method)
+    @pytest.fixture(autouse=True)
+    def setup_method(self, datapath):
+        super(_EngineToConnMixin, self).setup_method(datapath)
         engine = self.conn
         conn = engine.connect()
         self.__tx = conn.begin()
@@ -1034,12 +1037,14 @@ def setup_method(self, method):
         self.__engine = engine
         self.conn = conn
 
-    def teardown_method(self, method):
+        yield
+
         self.__tx.rollback()
         self.conn.close()
         self.conn = self.__engine
         self.pandasSQL = sql.SQLDatabase(self.__engine)
-        super(_EngineToConnMixin, self).teardown_method(method)
+        # XXX:
+        # super(_EngineToConnMixin, self).teardown_method(method)
 
 
 @pytest.mark.single
@@ -1136,7 +1141,7 @@ class _TestSQLAlchemy(SQLAlchemyMixIn, PandasSQLTest):
     """
     flavor = None
 
-    @classmethod
+    @pytest.fixture(autouse=True, scope='class')
     def setup_class(cls):
         cls.setup_import()
         cls.setup_driver()
@@ -1149,10 +1154,11 @@ def setup_class(cls):
             msg = "{0} - can't connect to {1} server".format(cls, cls.flavor)
             pytest.skip(msg)
 
-    def setup_method(self, method):
+    @pytest.fixture(autouse=True)
+    def setup_method(self, datapath):
         self.setup_connect()
 
-        self._load_iris_data()
+        self._load_iris_data(datapath)
         self._load_raw_sql()
         self._load_test1_data()
 
@@ -1920,11 +1926,12 @@ class TestSQLiteFallback(SQLiteMixIn, PandasSQLTest):
     def connect(cls):
         return sqlite3.connect(':memory:')
 
-    def setup_method(self, method):
+    @pytest.fixture(autouse=True)
+    def setup_method(self, datapath):
         self.conn = self.connect()
         self.pandasSQL = sql.SQLiteDatabase(self.conn)
 
-        self._load_iris_data()
+        self._load_iris_data(datapath)
 
         self._load_test1_data()
 
@@ -2135,8 +2142,9 @@ def _skip_if_no_pymysql():
 @pytest.mark.single
 class TestXSQLite(SQLiteMixIn):
 
-    def setup_method(self, method):
-        self.method = method
+    @pytest.fixture(autouse=True)
+    def setup_method(self, request, datapath):
+        self.method = request.function
         self.conn = sqlite3.connect(':memory:')
 
     def test_basic(self):
@@ -2215,8 +2223,7 @@ def test_execute_fail(self):
         with pytest.raises(Exception):
             sql.execute('INSERT INTO test VALUES("foo", "bar", 7)', self.conn)
 
-    @tm.capture_stdout
-    def test_execute_closed_connection(self):
+    def test_execute_closed_connection(self, request, datapath):
         create_sql = """
         CREATE TABLE test
         (
@@ -2236,7 +2243,7 @@ def test_execute_closed_connection(self):
             tquery("select * from test", con=self.conn)
 
         # Initialize connection again (needed for tearDown)
-        self.setup_method(self.method)
+        self.setup_method(request, datapath)
 
     def test_na_roundtrip(self):
         pass
@@ -2341,7 +2348,7 @@ def clean_up(test_table_to_drop):
                   "if SQLAlchemy is not installed")
 class TestXMySQL(MySQLMixIn):
 
-    @classmethod
+    @pytest.fixture(autouse=True, scope='class')
     def setup_class(cls):
         _skip_if_no_pymysql()
 
@@ -2370,7 +2377,8 @@ def setup_class(cls):
                 "[pandas] in your system's mysql default file, "
                 "typically located at ~/.my.cnf or /etc/.my.cnf. ")
 
-    def setup_method(self, method):
+    @pytest.fixture(autouse=True)
+    def setup_method(self, request, datapath):
         _skip_if_no_pymysql()
         import pymysql
         try:
@@ -2396,7 +2404,7 @@ def setup_method(self, method):
                 "[pandas] in your system's mysql default file, "
                 "typically located at ~/.my.cnf or /etc/.my.cnf. ")
 
-        self.method = method
+        self.method = request.function
 
     def test_basic(self):
         _skip_if_no_pymysql()
@@ -2501,8 +2509,7 @@ def test_execute_fail(self):
         with pytest.raises(Exception):
             sql.execute('INSERT INTO test VALUES("foo", "bar", 7)', self.conn)
 
-    @tm.capture_stdout
-    def test_execute_closed_connection(self):
+    def test_execute_closed_connection(self, request, datapath):
         _skip_if_no_pymysql()
         drop_sql = "DROP TABLE IF EXISTS test"
         create_sql = """
@@ -2525,7 +2532,7 @@ def test_execute_closed_connection(self):
             tquery("select * from test", con=self.conn)
 
         # Initialize connection again (needed for tearDown)
-        self.setup_method(self.method)
+        self.setup_method(request, datapath)
 
     def test_na_roundtrip(self):
         _skip_if_no_pymysql()
diff --git a/pandas/tests/io/test_stata.py b/pandas/tests/io/test_stata.py
index f3a465da4e87f..cff63516f4086 100644
--- a/pandas/tests/io/test_stata.py
+++ b/pandas/tests/io/test_stata.py
@@ -25,8 +25,8 @@
 
 
 @pytest.fixture
-def dirpath():
-    return tm.get_data_path()
+def dirpath(datapath):
+    return datapath("io", "data")
 
 
 @pytest.fixture
@@ -39,8 +39,9 @@ def parsed_114(dirpath):
 
 class TestStata(object):
 
-    def setup_method(self, method):
-        self.dirpath = tm.get_data_path()
+    @pytest.fixture(autouse=True)
+    def setup_method(self, datapath):
+        self.dirpath = datapath("io", "data")
         self.dta1_114 = os.path.join(self.dirpath, 'stata1_114.dta')
         self.dta1_117 = os.path.join(self.dirpath, 'stata1_117.dta')
 
diff --git a/pandas/tests/plotting/common.py b/pandas/tests/plotting/common.py
index f65791329f2f1..09687dd97bd43 100644
--- a/pandas/tests/plotting/common.py
+++ b/pandas/tests/plotting/common.py
@@ -74,11 +74,6 @@ def setup_method(self, method):
         else:
             self.default_figsize = (8.0, 6.0)
         self.default_tick_position = 'left' if self.mpl_ge_2_0_0 else 'default'
-        # common test data
-        from pandas import read_csv
-        base = os.path.join(os.path.dirname(curpath()), os.pardir)
-        path = os.path.join(base, 'tests', 'data', 'iris.csv')
-        self.iris = read_csv(path)
 
         n = 100
         with tm.RNGContext(42):
diff --git a/pandas/tests/plotting/test_deprecated.py b/pandas/tests/plotting/test_deprecated.py
index 2c2d371921d2f..a45b17ec98261 100644
--- a/pandas/tests/plotting/test_deprecated.py
+++ b/pandas/tests/plotting/test_deprecated.py
@@ -46,10 +46,9 @@ def test_boxplot_deprecated(self):
                              by='indic')
 
     @pytest.mark.slow
-    def test_radviz_deprecated(self):
-        df = self.iris
+    def test_radviz_deprecated(self, iris):
         with tm.assert_produces_warning(FutureWarning):
-            plotting.radviz(frame=df, class_column='Name')
+            plotting.radviz(frame=iris, class_column='Name')
 
     @pytest.mark.slow
     def test_plot_params(self):
diff --git a/pandas/tests/plotting/test_misc.py b/pandas/tests/plotting/test_misc.py
index c82c939584dc7..0473610ea2f8f 100644
--- a/pandas/tests/plotting/test_misc.py
+++ b/pandas/tests/plotting/test_misc.py
@@ -100,11 +100,11 @@ def test_scatter_matrix_axis(self):
             axes, xlabelsize=8, xrot=90, ylabelsize=8, yrot=0)
 
     @pytest.mark.slow
-    def test_andrews_curves(self):
+    def test_andrews_curves(self, iris):
         from pandas.plotting import andrews_curves
         from matplotlib import cm
 
-        df = self.iris
+        df = iris
 
         _check_plot_works(andrews_curves, frame=df, class_column='Name')
 
@@ -165,11 +165,11 @@ def test_andrews_curves(self):
             andrews_curves(data=df, class_column='Name')
 
     @pytest.mark.slow
-    def test_parallel_coordinates(self):
+    def test_parallel_coordinates(self, iris):
         from pandas.plotting import parallel_coordinates
         from matplotlib import cm
 
-        df = self.iris
+        df = iris
 
         ax = _check_plot_works(parallel_coordinates,
                                frame=df, class_column='Name')
@@ -234,11 +234,11 @@ def test_parallel_coordinates_with_sorted_labels(self):
             assert prev[1] < nxt[1] and prev[0] < nxt[0]
 
     @pytest.mark.slow
-    def test_radviz(self):
+    def test_radviz(self, iris):
         from pandas.plotting import radviz
         from matplotlib import cm
 
-        df = self.iris
+        df = iris
         _check_plot_works(radviz, frame=df, class_column='Name')
 
         rgba = ('#556270', '#4ECDC4', '#C7F464')
@@ -272,8 +272,8 @@ def test_radviz(self):
         self._check_colors(handles, facecolors=colors)
 
     @pytest.mark.slow
-    def test_subplot_titles(self):
-        df = self.iris.drop('Name', axis=1).head()
+    def test_subplot_titles(self, iris):
+        df = iris.drop('Name', axis=1).head()
         # Use the column names as the subplot titles
         title = list(df.columns)
 
diff --git a/pandas/tests/reshape/merge/test_merge_asof.py b/pandas/tests/reshape/merge/test_merge_asof.py
index cebbcc41c3e17..59b53cd23010e 100644
--- a/pandas/tests/reshape/merge/test_merge_asof.py
+++ b/pandas/tests/reshape/merge/test_merge_asof.py
@@ -1,4 +1,3 @@
-import os
 import pytest
 
 import pytz
@@ -13,8 +12,8 @@
 
 class TestAsOfMerge(object):
 
-    def read_data(self, name, dedupe=False):
-        path = os.path.join(tm.get_data_path(), name)
+    def read_data(self, datapath, name, dedupe=False):
+        path = datapath('reshape', 'merge', 'data', name)
         x = read_csv(path)
         if dedupe:
             x = (x.drop_duplicates(['time', 'ticker'], keep='last')
@@ -23,15 +22,17 @@ def read_data(self, name, dedupe=False):
         x.time = to_datetime(x.time)
         return x
 
-    def setup_method(self, method):
+    @pytest.fixture(autouse=True)
+    def setup_method(self, datapath):
 
-        self.trades = self.read_data('trades.csv')
-        self.quotes = self.read_data('quotes.csv', dedupe=True)
-        self.asof = self.read_data('asof.csv')
-        self.tolerance = self.read_data('tolerance.csv')
-        self.allow_exact_matches = self.read_data('allow_exact_matches.csv')
+        self.trades = self.read_data(datapath, 'trades.csv')
+        self.quotes = self.read_data(datapath, 'quotes.csv', dedupe=True)
+        self.asof = self.read_data(datapath, 'asof.csv')
+        self.tolerance = self.read_data(datapath, 'tolerance.csv')
+        self.allow_exact_matches = self.read_data(datapath,
+                                                  'allow_exact_matches.csv')
         self.allow_exact_matches_and_tolerance = self.read_data(
-            'allow_exact_matches_and_tolerance.csv')
+            datapath, 'allow_exact_matches_and_tolerance.csv')
 
     def test_examples1(self):
         """ doc-string examples """
@@ -423,11 +424,11 @@ def test_multiby_indexed(self):
             pd.merge_asof(left, right, left_index=True, right_index=True,
                           left_by=['k1', 'k2'], right_by=['k1'])
 
-    def test_basic2(self):
+    def test_basic2(self, datapath):
 
-        expected = self.read_data('asof2.csv')
-        trades = self.read_data('trades2.csv')
-        quotes = self.read_data('quotes2.csv', dedupe=True)
+        expected = self.read_data(datapath, 'asof2.csv')
+        trades = self.read_data(datapath, 'trades2.csv')
+        quotes = self.read_data(datapath, 'quotes2.csv', dedupe=True)
 
         result = merge_asof(trades, quotes,
                             on='time',
@@ -467,14 +468,14 @@ def test_valid_join_keys(self):
             merge_asof(trades, quotes,
                        by='ticker')
 
-    def test_with_duplicates(self):
+    def test_with_duplicates(self, datapath):
 
         q = pd.concat([self.quotes, self.quotes]).sort_values(
             ['time', 'ticker']).reset_index(drop=True)
         result = merge_asof(self.trades, q,
                             on='time',
                             by='ticker')
-        expected = self.read_data('asof.csv')
+        expected = self.read_data(datapath, 'asof.csv')
         assert_frame_equal(result, expected)
 
     def test_with_duplicates_no_on(self):
diff --git a/pandas/tests/reshape/test_tile.py b/pandas/tests/reshape/test_tile.py
index 5ea27f9e34e1c..807fb2530603a 100644
--- a/pandas/tests/reshape/test_tile.py
+++ b/pandas/tests/reshape/test_tile.py
@@ -282,10 +282,10 @@ def test_round_frac(self):
         result = tmod._round_frac(0.000123456, precision=2)
         assert result == 0.00012
 
-    def test_qcut_binning_issues(self):
+    def test_qcut_binning_issues(self, datapath):
         # #1978, 1979
-        path = os.path.join(tm.get_data_path(), 'cut_data.csv')
-        arr = np.loadtxt(path)
+        cut_file = datapath(os.path.join('reshape', 'data', 'cut_data.csv'))
+        arr = np.loadtxt(cut_file)
 
         result = qcut(arr, 20)
 
diff --git a/pandas/tests/tseries/offsets/test_offsets.py b/pandas/tests/tseries/offsets/test_offsets.py
index 0c08d813a7f1b..00701ca2be946 100644
--- a/pandas/tests/tseries/offsets/test_offsets.py
+++ b/pandas/tests/tseries/offsets/test_offsets.py
@@ -1,4 +1,3 @@
-import os
 from distutils.version import LooseVersion
 from datetime import date, datetime, timedelta
 
@@ -455,14 +454,15 @@ def test_add(self, offset_types, tz):
         assert isinstance(result, Timestamp)
         assert result == expected_localize
 
-    def test_pickle_v0_15_2(self):
+    def test_pickle_v0_15_2(self, datapath):
         offsets = {'DateOffset': DateOffset(years=1),
                    'MonthBegin': MonthBegin(1),
                    'Day': Day(1),
                    'YearBegin': YearBegin(1),
                    'Week': Week(1)}
-        pickle_path = os.path.join(tm.get_data_path(),
-                                   'dateoffset_0_15_2.pickle')
+
+        pickle_path = datapath('tseries', 'offsets', 'data',
+                               'dateoffset_0_15_2.pickle')
         # This code was executed once on v0.15.2 to generate the pickle:
         # with open(pickle_path, 'wb') as f: pickle.dump(offsets, f)
         #
@@ -1854,12 +1854,10 @@ def _check_roundtrip(obj):
         _check_roundtrip(self.offset2)
         _check_roundtrip(self.offset * 2)
 
-    def test_pickle_compat_0_14_1(self):
+    def test_pickle_compat_0_14_1(self, datapath):
         hdays = [datetime(2013, 1, 1) for ele in range(4)]
-
-        pth = tm.get_data_path()
-
-        cday0_14_1 = read_pickle(os.path.join(pth, 'cday-0.14.1.pickle'))
+        pth = datapath('tseries', 'offsets', 'data', 'cday-0.14.1.pickle')
+        cday0_14_1 = read_pickle(pth)
         cday = CDay(holidays=hdays)
         assert cday == cday0_14_1
 
diff --git a/pandas/tests/util/test_testing.py b/pandas/tests/util/test_testing.py
index ab7c4fb528452..4d34987e14f75 100644
--- a/pandas/tests/util/test_testing.py
+++ b/pandas/tests/util/test_testing.py
@@ -1,4 +1,5 @@
 # -*- coding: utf-8 -*-
+import os
 import pandas as pd
 import pytest
 import numpy as np
@@ -841,3 +842,15 @@ def test_locale(self):
         # GH9744
         locales = tm.get_locales()
         assert len(locales) >= 1
+
+
+def test_datapath_missing(datapath, request):
+    if not request.config.getoption("--strict-data-files"):
+        pytest.skip("Need to set '--strict-data-files'")
+
+    with pytest.raises(ValueError):
+        datapath('not_a_file')
+
+    result = datapath('data', 'iris.csv')
+    expected = os.path.join('pandas', 'tests', 'data', 'iris.csv')
+    assert result == expected
diff --git a/pandas/util/_test_decorators.py b/pandas/util/_test_decorators.py
index 27c24e3a68079..c6ab24403d58d 100644
--- a/pandas/util/_test_decorators.py
+++ b/pandas/util/_test_decorators.py
@@ -23,7 +23,6 @@ def test_foo():
 
 For more information, refer to the ``pytest`` documentation on ``skipif``.
 """
-
 import pytest
 import locale
 from distutils.version import LooseVersion
diff --git a/pandas/util/testing.py b/pandas/util/testing.py
index 6384eca9849f6..b7edbff00a4b9 100644
--- a/pandas/util/testing.py
+++ b/pandas/util/testing.py
@@ -6,7 +6,6 @@
 import sys
 import tempfile
 import warnings
-import inspect
 import os
 import subprocess
 import locale
@@ -751,15 +750,6 @@ def ensure_clean(filename=None, return_filelike=False):
                 print("Exception on removing file: {error}".format(error=e))
 
 
-def get_data_path(f=''):
-    """Return the path of a data file, these are relative to the current test
-    directory.
-    """
-    # get our callers file
-    _, filename, _, _, _, _ = inspect.getouterframes(inspect.currentframe())[1]
-    base_dir = os.path.abspath(os.path.dirname(filename))
-    return os.path.join(base_dir, 'data', f)
-
 # -----------------------------------------------------------------------------
 # Comparators
 
diff --git a/setup.cfg b/setup.cfg
index 6d9657737a8bd..9ec967c25e225 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -32,4 +32,5 @@ markers =
     slow: mark a test as slow
     network: mark a test as network
     high_memory: mark a test as a high-memory only
-doctest_optionflags= NORMALIZE_WHITESPACE IGNORE_EXCEPTION_DETAIL
+addopts = --strict-data-files
+doctest_optionflags= NORMALIZE_WHITESPACE IGNORE_EXCEPTION_DETAIL
\ No newline at end of file
diff --git a/setup.py b/setup.py
index c5831eb097767..5d6bbbcf7b862 100755
--- a/setup.py
+++ b/setup.py
@@ -734,11 +734,7 @@ def pxd(name):
       maintainer=AUTHOR,
       version=versioneer.get_version(),
       packages=find_packages(include=['pandas', 'pandas.*']),
-      package_data={'': ['data/*', 'templates/*', '_libs/*.dll'],
-                    'pandas.tests.io': ['data/legacy_hdf/*.h5',
-                                        'data/legacy_pickle/*/*.pickle',
-                                        'data/legacy_msgpack/*/*.msgpack',
-                                        'data/html_encoding/*.html']},
+      package_data={'': ['templates/*', '_libs/*.dll']},
       ext_modules=extensions,
       maintainer_email=EMAIL,
       description=DESCRIPTION,

From db51f0a57030fd71d26df00c2e3dd63b7fd542b9 Mon Sep 17 00:00:00 2001
From: david-liu-brattle-1
 <36486871+david-liu-brattle-1@users.noreply.github.com>
Date: Tue, 26 Jun 2018 18:19:41 -0400
Subject: [PATCH 27/55] Cleanup clipboard tests (#21163)

(cherry picked from commit 9d38e0ef5842fafcc4e391abc6aba486684e6dc7)
---
 pandas/tests/io/test_clipboard.py | 196 ++++++++++++++++++++----------
 1 file changed, 129 insertions(+), 67 deletions(-)

diff --git a/pandas/tests/io/test_clipboard.py b/pandas/tests/io/test_clipboard.py
index 98c0effabec84..80fddd50fc9a8 100644
--- a/pandas/tests/io/test_clipboard.py
+++ b/pandas/tests/io/test_clipboard.py
@@ -9,10 +9,11 @@
 from pandas import DataFrame
 from pandas import read_clipboard
 from pandas import get_option
+from pandas.compat import PY2
 from pandas.util import testing as tm
 from pandas.util.testing import makeCustomDataframe as mkdf
 from pandas.io.clipboard.exceptions import PyperclipException
-from pandas.io.clipboard import clipboard_set
+from pandas.io.clipboard import clipboard_set, clipboard_get
 
 
 try:
@@ -22,73 +23,134 @@
     _DEPS_INSTALLED = 0
 
 
+def build_kwargs(sep, excel):
+    kwargs = {}
+    if excel != 'default':
+        kwargs['excel'] = excel
+    if sep != 'default':
+        kwargs['sep'] = sep
+    return kwargs
+
+
+@pytest.fixture(params=['delims', 'utf8', 'string', 'long', 'nonascii',
+                        'colwidth', 'mixed', 'float', 'int'])
+def df(request):
+    data_type = request.param
+
+    if data_type == 'delims':
+        return pd.DataFrame({'a': ['"a,\t"b|c', 'd\tef´'],
+                             'b': ['hi\'j', 'k\'\'lm']})
+    elif data_type == 'utf8':
+        return pd.DataFrame({'a': ['µasd', 'Ωœ∑´'],
+                             'b': ['øπ∆˚¬', 'œ∑´®']})
+    elif data_type == 'string':
+        return mkdf(5, 3, c_idx_type='s', r_idx_type='i',
+                    c_idx_names=[None], r_idx_names=[None])
+    elif data_type == 'long':
+        max_rows = get_option('display.max_rows')
+        return mkdf(max_rows + 1, 3,
+                    data_gen_f=lambda *args: randint(2),
+                    c_idx_type='s', r_idx_type='i',
+                    c_idx_names=[None], r_idx_names=[None])
+    elif data_type == 'nonascii':
+        return pd.DataFrame({'en': 'in English'.split(),
+                             'es': 'en español'.split()})
+    elif data_type == 'colwidth':
+        _cw = get_option('display.max_colwidth') + 1
+        return mkdf(5, 3, data_gen_f=lambda *args: 'x' * _cw,
+                    c_idx_type='s', r_idx_type='i',
+                    c_idx_names=[None], r_idx_names=[None])
+    elif data_type == 'mixed':
+        return DataFrame({'a': np.arange(1.0, 6.0) + 0.01,
+                          'b': np.arange(1, 6),
+                          'c': list('abcde')})
+    elif data_type == 'float':
+        return mkdf(5, 3, data_gen_f=lambda r, c: float(r) + 0.01,
+                    c_idx_type='s', r_idx_type='i',
+                    c_idx_names=[None], r_idx_names=[None])
+    elif data_type == 'int':
+        return mkdf(5, 3, data_gen_f=lambda *args: randint(2),
+                    c_idx_type='s', r_idx_type='i',
+                    c_idx_names=[None], r_idx_names=[None])
+    else:
+        raise ValueError
+
+
 @pytest.mark.single
 @pytest.mark.skipif(not _DEPS_INSTALLED,
                     reason="clipboard primitives not installed")
 class TestClipboard(object):
-
-    @classmethod
-    def setup_class(cls):
-        cls.data = {}
-        cls.data['string'] = mkdf(5, 3, c_idx_type='s', r_idx_type='i',
-                                  c_idx_names=[None], r_idx_names=[None])
-        cls.data['int'] = mkdf(5, 3, data_gen_f=lambda *args: randint(2),
-                               c_idx_type='s', r_idx_type='i',
-                               c_idx_names=[None], r_idx_names=[None])
-        cls.data['float'] = mkdf(5, 3,
-                                 data_gen_f=lambda r, c: float(r) + 0.01,
-                                 c_idx_type='s', r_idx_type='i',
-                                 c_idx_names=[None], r_idx_names=[None])
-        cls.data['mixed'] = DataFrame({'a': np.arange(1.0, 6.0) + 0.01,
-                                       'b': np.arange(1, 6),
-                                       'c': list('abcde')})
-
-        # Test columns exceeding "max_colwidth" (GH8305)
-        _cw = get_option('display.max_colwidth') + 1
-        cls.data['colwidth'] = mkdf(5, 3, data_gen_f=lambda *args: 'x' * _cw,
-                                    c_idx_type='s', r_idx_type='i',
-                                    c_idx_names=[None], r_idx_names=[None])
-        # Test GH-5346
-        max_rows = get_option('display.max_rows')
-        cls.data['longdf'] = mkdf(max_rows + 1, 3,
-                                  data_gen_f=lambda *args: randint(2),
-                                  c_idx_type='s', r_idx_type='i',
-                                  c_idx_names=[None], r_idx_names=[None])
-        # Test for non-ascii text: GH9263
-        cls.data['nonascii'] = pd.DataFrame({'en': 'in English'.split(),
-                                             'es': 'en español'.split()})
-        # unicode round trip test for GH 13747, GH 12529
-        cls.data['utf8'] = pd.DataFrame({'a': ['µasd', 'Ωœ∑´'],
-                                         'b': ['øπ∆˚¬', 'œ∑´®']})
-        cls.data_types = list(cls.data.keys())
-
-    @classmethod
-    def teardown_class(cls):
-        del cls.data_types, cls.data
-
-    def check_round_trip_frame(self, data_type, excel=None, sep=None,
+    def check_round_trip_frame(self, data, excel=None, sep=None,
                                encoding=None):
-        data = self.data[data_type]
         data.to_clipboard(excel=excel, sep=sep, encoding=encoding)
-        if sep is not None:
-            result = read_clipboard(sep=sep, index_col=0, encoding=encoding)
-        else:
-            result = read_clipboard(encoding=encoding)
+        result = read_clipboard(sep=sep or '\t', index_col=0,
+                                encoding=encoding)
         tm.assert_frame_equal(data, result, check_dtype=False)
 
-    def test_round_trip_frame_sep(self):
-        for dt in self.data_types:
-            self.check_round_trip_frame(dt, sep=',')
-            self.check_round_trip_frame(dt, sep=r'\s+')
-            self.check_round_trip_frame(dt, sep='|')
-
-    def test_round_trip_frame_string(self):
-        for dt in self.data_types:
-            self.check_round_trip_frame(dt, excel=False)
-
-    def test_round_trip_frame(self):
-        for dt in self.data_types:
-            self.check_round_trip_frame(dt)
+    # Test that default arguments copy as tab delimited
+    @pytest.mark.xfail(reason='to_clipboard defaults to space delim. '
+                       'Issue in #21104, Fixed in #21111')
+    def test_round_trip_frame(self, df):
+        self.check_round_trip_frame(df)
+
+    # Test that explicit delimiters are respected
+    @pytest.mark.parametrize('sep', ['\t', ',', '|'])
+    def test_round_trip_frame_sep(self, df, sep):
+        self.check_round_trip_frame(df, sep=sep)
+
+    # Test white space separator
+    @pytest.mark.xfail(reason="Fails on 'delims' df because quote escapes "
+                       "aren't handled correctly in default c engine. Fixed "
+                       "in #21111 by defaulting to python engine for "
+                       "whitespace separator")
+    def test_round_trip_frame_string(self, df):
+        df.to_clipboard(excel=False, sep=None)
+        result = read_clipboard()
+        assert df.to_string() == result.to_string()
+        assert df.shape == result.shape
+
+    # Two character separator is not supported in to_clipboard
+    # Test that multi-character separators are not silently passed
+    @pytest.mark.xfail(reason="Not yet implemented.  Fixed in #21111")
+    def test_excel_sep_warning(self, df):
+        with tm.assert_produces_warning():
+            df.to_clipboard(excel=True, sep=r'\t')
+
+    # Separator is ignored when excel=False and should produce a warning
+    @pytest.mark.xfail(reason="Not yet implemented.  Fixed in #21111")
+    def test_copy_delim_warning(self, df):
+        with tm.assert_produces_warning():
+            df.to_clipboard(excel=False, sep='\t')
+
+    # Tests that the default behavior of to_clipboard is tab
+    # delimited and excel="True"
+    @pytest.mark.xfail(reason="to_clipboard defaults to space delim. Issue in "
+                       "#21104, Fixed in #21111")
+    @pytest.mark.parametrize('sep', ['\t', None, 'default'])
+    @pytest.mark.parametrize('excel', [True, None, 'default'])
+    def test_clipboard_copy_tabs_default(self, sep, excel, df):
+        kwargs = build_kwargs(sep, excel)
+        df.to_clipboard(**kwargs)
+        if PY2:
+            # to_clipboard copies unicode, to_csv produces bytes. This is
+            # expected behavior
+            assert clipboard_get().encode('utf-8') == df.to_csv(sep='\t')
+        else:
+            assert clipboard_get() == df.to_csv(sep='\t')
+
+    # Tests reading of white space separated tables
+    @pytest.mark.xfail(reason="Fails on 'delims' df because quote escapes "
+                       "aren't handled correctly. in default c engine. Fixed "
+                       "in #21111 by defaulting to python engine for "
+                       "whitespace separator")
+    @pytest.mark.parametrize('sep', [None, 'default'])
+    @pytest.mark.parametrize('excel', [False])
+    def test_clipboard_copy_strings(self, sep, excel, df):
+        kwargs = build_kwargs(sep, excel)
+        df.to_clipboard(**kwargs)
+        result = read_clipboard(sep=r'\s+')
+        assert result.to_string() == df.to_string()
+        assert df.shape == result.shape
 
     def test_read_clipboard_infer_excel(self):
         # gh-19010: avoid warnings
@@ -124,15 +186,15 @@ def test_read_clipboard_infer_excel(self):
 
         tm.assert_frame_equal(res, exp)
 
-    def test_invalid_encoding(self):
+    def test_invalid_encoding(self, df):
         # test case for testing invalid encoding
-        data = self.data['string']
         with pytest.raises(ValueError):
-            data.to_clipboard(encoding='ascii')
+            df.to_clipboard(encoding='ascii')
         with pytest.raises(NotImplementedError):
             pd.read_clipboard(encoding='ascii')
 
-    def test_round_trip_valid_encodings(self):
-        for enc in ['UTF-8', 'utf-8', 'utf8']:
-            for dt in self.data_types:
-                self.check_round_trip_frame(dt, encoding=enc)
+    @pytest.mark.xfail(reason='to_clipboard defaults to space delim. '
+                       'Issue in #21104, Fixed in #21111')
+    @pytest.mark.parametrize('enc', ['UTF-8', 'utf-8', 'utf8'])
+    def test_round_trip_valid_encodings(self, enc, df):
+        self.check_round_trip_frame(df, encoding=enc)

From d9ada974d0f73c72953fcece56e084dc277bc4c7 Mon Sep 17 00:00:00 2001
From: Jeremy Schendel <jschendel@users.noreply.github.com>
Date: Wed, 27 Jun 2018 03:57:55 -0600
Subject: [PATCH 28/55] DOC: Fix versionadded directive typos in IntervalIndex
 (#21649)

(cherry picked from commit b35cb1c127aae894c2a1ee5ab2f16987b91e9000)
---
 pandas/core/indexes/interval.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py
index eb9d7efc06c27..23a655b9a51ee 100644
--- a/pandas/core/indexes/interval.py
+++ b/pandas/core/indexes/interval.py
@@ -160,7 +160,7 @@ class IntervalIndex(IntervalMixin, Index):
     dtype : dtype or None, default None
         If None, dtype will be inferred
 
-        ..versionadded:: 0.23.0
+        .. versionadded:: 0.23.0
 
     Attributes
     ----------
@@ -438,7 +438,7 @@ def from_breaks(cls, breaks, closed='right', name=None, copy=False,
         dtype : dtype or None, default None
             If None, dtype will be inferred
 
-            ..versionadded:: 0.23.0
+            .. versionadded:: 0.23.0
 
         Examples
         --------
@@ -568,7 +568,7 @@ def from_intervals(cls, data, closed=None, name=None, copy=False,
         dtype : dtype or None, default None
             If None, dtype will be inferred
 
-            ..versionadded:: 0.23.0
+            .. versionadded:: 0.23.0
 
         Examples
         --------
@@ -619,7 +619,7 @@ def from_tuples(cls, data, closed='right', name=None, copy=False,
         dtype : dtype or None, default None
             If None, dtype will be inferred
 
-            ..versionadded:: 0.23.0
+            .. versionadded:: 0.23.0
 
         Examples
         --------
@@ -671,7 +671,7 @@ def to_tuples(self, na_tuple=True):
             Returns NA as a tuple if True, ``(nan, nan)``, or just as the NA
             value itself if False, ``nan``.
 
-            ..versionadded:: 0.23.0
+            .. versionadded:: 0.23.0
 
         Examples
         --------

From 0a42f18687a1e586b09bfaa18b0ddc85e20d760a Mon Sep 17 00:00:00 2001
From: alimcmaster1 <alimcmaster1@gmail.com>
Date: Fri, 29 Jun 2018 01:26:38 +0100
Subject: [PATCH 29/55] Fix Timestamp rounding (#21507)

(cherry picked from commit 76ef7c459e752f72abc62e030fd1cea0117c1dca)
---
 doc/source/whatsnew/v0.23.2.txt               |  2 +-
 pandas/_libs/tslibs/timestamps.pyx            | 34 +++++++++++++------
 .../indexes/datetimes/test_scalar_compat.py   | 19 +++++++++++
 .../tests/scalar/timestamp/test_unary_ops.py  | 20 ++++++++++-
 4 files changed, 62 insertions(+), 13 deletions(-)

diff --git a/doc/source/whatsnew/v0.23.2.txt b/doc/source/whatsnew/v0.23.2.txt
index b3da4d1c4e288..9d96e807dfd3e 100644
--- a/doc/source/whatsnew/v0.23.2.txt
+++ b/doc/source/whatsnew/v0.23.2.txt
@@ -54,7 +54,7 @@ Fixed Regressions
 
 - Fixed regression in :meth:`to_csv` when handling file-like object incorrectly (:issue:`21471`)
 - Bug in both :meth:`DataFrame.first_valid_index` and :meth:`Series.first_valid_index` raised for a row index having duplicate values (:issue:`21441`)
-- 
+- Bug in :meth:`Timestamp.ceil` and :meth:`Timestamp.floor` when timestamp is a multiple of the rounding frequency (:issue:`21262`)
 
 .. _whatsnew_0232.performance:
 
diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx
index ba5ebdab82ddc..123ccebf83a56 100644
--- a/pandas/_libs/tslibs/timestamps.pyx
+++ b/pandas/_libs/tslibs/timestamps.pyx
@@ -59,42 +59,51 @@ cdef inline object create_timestamp_from_ts(int64_t value,
 
 
 def round_ns(values, rounder, freq):
+
     """
     Applies rounding function at given frequency
 
     Parameters
     ----------
-    values : int, :obj:`ndarray`
-    rounder : function
+    values : :obj:`ndarray`
+    rounder : function, eg. 'ceil', 'floor', 'round'
     freq : str, obj
 
     Returns
     -------
-    int or :obj:`ndarray`
+    :obj:`ndarray`
     """
+
     from pandas.tseries.frequencies import to_offset
     unit = to_offset(freq).nanos
+
+    # GH21262 If the Timestamp is multiple of the freq str
+    # don't apply any rounding
+    mask = values % unit == 0
+    if mask.all():
+        return values
+    r = values.copy()
+
     if unit < 1000:
         # for nano rounding, work with the last 6 digits separately
         # due to float precision
         buff = 1000000
-        r = (buff * (values // buff) + unit *
-             (rounder((values % buff) * (1 / float(unit)))).astype('i8'))
+        r[~mask] = (buff * (values[~mask] // buff) +
+                    unit * (rounder((values[~mask] % buff) *
+                            (1 / float(unit)))).astype('i8'))
     else:
         if unit % 1000 != 0:
             msg = 'Precision will be lost using frequency: {}'
             warnings.warn(msg.format(freq))
-
         # GH19206
         # to deal with round-off when unit is large
         if unit >= 1e9:
             divisor = 10 ** int(np.log10(unit / 1e7))
         else:
             divisor = 10
-
-        r = (unit * rounder((values * (divisor / float(unit))) / divisor)
-             .astype('i8'))
-
+        r[~mask] = (unit * rounder((values[~mask] *
+                    (divisor / float(unit))) / divisor)
+                    .astype('i8'))
     return r
 
 
@@ -649,7 +658,10 @@ class Timestamp(_Timestamp):
         else:
             value = self.value
 
-        r = round_ns(value, rounder, freq)
+        value = np.array([value], dtype=np.int64)
+
+        # Will only ever contain 1 element for timestamp
+        r = round_ns(value, rounder, freq)[0]
         result = Timestamp(r, unit='ns')
         if self.tz is not None:
             result = result.tz_localize(self.tz)
diff --git a/pandas/tests/indexes/datetimes/test_scalar_compat.py b/pandas/tests/indexes/datetimes/test_scalar_compat.py
index 9180bb0af3af3..801dcb91b124e 100644
--- a/pandas/tests/indexes/datetimes/test_scalar_compat.py
+++ b/pandas/tests/indexes/datetimes/test_scalar_compat.py
@@ -134,6 +134,21 @@ def test_round(self, tz):
             ts = '2016-10-17 12:00:00.001501031'
             DatetimeIndex([ts]).round('1010ns')
 
+    def test_no_rounding_occurs(self, tz):
+        # GH 21262
+        rng = date_range(start='2016-01-01', periods=5,
+                         freq='2Min', tz=tz)
+
+        expected_rng = DatetimeIndex([
+            Timestamp('2016-01-01 00:00:00', tz=tz, freq='2T'),
+            Timestamp('2016-01-01 00:02:00', tz=tz, freq='2T'),
+            Timestamp('2016-01-01 00:04:00', tz=tz, freq='2T'),
+            Timestamp('2016-01-01 00:06:00', tz=tz, freq='2T'),
+            Timestamp('2016-01-01 00:08:00', tz=tz, freq='2T'),
+        ])
+
+        tm.assert_index_equal(rng.round(freq='2T'), expected_rng)
+
     @pytest.mark.parametrize('test_input, rounder, freq, expected', [
         (['2117-01-01 00:00:45'], 'floor', '15s', ['2117-01-01 00:00:45']),
         (['2117-01-01 00:00:45'], 'ceil', '15s', ['2117-01-01 00:00:45']),
@@ -143,6 +158,10 @@ def test_round(self, tz):
          ['1823-01-01 00:00:01.000000020']),
         (['1823-01-01 00:00:01'], 'floor', '1s', ['1823-01-01 00:00:01']),
         (['1823-01-01 00:00:01'], 'ceil', '1s', ['1823-01-01 00:00:01']),
+        (['2018-01-01 00:15:00'], 'ceil', '15T', ['2018-01-01 00:15:00']),
+        (['2018-01-01 00:15:00'], 'floor', '15T', ['2018-01-01 00:15:00']),
+        (['1823-01-01 03:00:00'], 'ceil', '3H', ['1823-01-01 03:00:00']),
+        (['1823-01-01 03:00:00'], 'floor', '3H', ['1823-01-01 03:00:00']),
         (('NaT', '1823-01-01 00:00:01'), 'floor', '1s',
          ('NaT', '1823-01-01 00:00:01')),
         (('NaT', '1823-01-01 00:00:01'), 'ceil', '1s',
diff --git a/pandas/tests/scalar/timestamp/test_unary_ops.py b/pandas/tests/scalar/timestamp/test_unary_ops.py
index aecddab8477fc..dbe31ccb11114 100644
--- a/pandas/tests/scalar/timestamp/test_unary_ops.py
+++ b/pandas/tests/scalar/timestamp/test_unary_ops.py
@@ -118,6 +118,25 @@ def test_ceil_floor_edge(self, test_input, rounder, freq, expected):
             expected = Timestamp(expected)
             assert result == expected
 
+    @pytest.mark.parametrize('test_input, freq, expected', [
+        ('2018-01-01 00:02:06', '2s', '2018-01-01 00:02:06'),
+        ('2018-01-01 00:02:00', '2T', '2018-01-01 00:02:00'),
+        ('2018-01-01 00:04:00', '4T', '2018-01-01 00:04:00'),
+        ('2018-01-01 00:15:00', '15T', '2018-01-01 00:15:00'),
+        ('2018-01-01 00:20:00', '20T', '2018-01-01 00:20:00'),
+        ('2018-01-01 03:00:00', '3H', '2018-01-01 03:00:00'),
+    ])
+    @pytest.mark.parametrize('rounder', ['ceil', 'floor', 'round'])
+    def test_round_minute_freq(self, test_input, freq, expected, rounder):
+        # Ensure timestamps that shouldnt round dont!
+        # GH#21262
+
+        dt = Timestamp(test_input)
+        expected = Timestamp(expected)
+        func = getattr(dt, rounder)
+        result = func(freq)
+        assert result == expected
+
     def test_ceil(self):
         dt = Timestamp('20130101 09:10:11')
         result = dt.ceil('D')
@@ -257,7 +276,6 @@ def test_timestamp(self):
         if PY3:
             # datetime.timestamp() converts in the local timezone
             with tm.set_timezone('UTC'):
-
                 # should agree with datetime.timestamp method
                 dt = ts.to_pydatetime()
                 assert dt.timestamp() == ts.timestamp()

From 2c00914e9addaa57d6b9f3308f25b5755e4dcc1a Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Fri, 29 Jun 2018 02:38:39 +0200
Subject: [PATCH 30/55] API/REGR: (re-)allow neg/pos unary operation on object
 dtype (#21590)

(cherry picked from commit 8cb6be0eced3bd3742efd0c03b2d903e3513cb11)
---
 doc/source/whatsnew/v0.23.2.txt      |  1 +
 pandas/core/generic.py               |  7 +++++--
 pandas/tests/frame/test_operators.py | 21 +++++++++++++++++++++
 3 files changed, 27 insertions(+), 2 deletions(-)

diff --git a/doc/source/whatsnew/v0.23.2.txt b/doc/source/whatsnew/v0.23.2.txt
index 9d96e807dfd3e..07ce99d4f19aa 100644
--- a/doc/source/whatsnew/v0.23.2.txt
+++ b/doc/source/whatsnew/v0.23.2.txt
@@ -54,6 +54,7 @@ Fixed Regressions
 
 - Fixed regression in :meth:`to_csv` when handling file-like object incorrectly (:issue:`21471`)
 - Bug in both :meth:`DataFrame.first_valid_index` and :meth:`Series.first_valid_index` raised for a row index having duplicate values (:issue:`21441`)
+- Fixed regression in unary negative operations with object dtype (:issue:`21380`)
 - Bug in :meth:`Timestamp.ceil` and :meth:`Timestamp.floor` when timestamp is a multiple of the rounding frequency (:issue:`21262`)
 
 .. _whatsnew_0232.performance:
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index 50a5c10a6865f..02462218e8b02 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -27,6 +27,7 @@
     is_dict_like,
     is_re_compilable,
     is_period_arraylike,
+    is_object_dtype,
     pandas_dtype)
 from pandas.core.dtypes.cast import maybe_promote, maybe_upcast_putmask
 from pandas.core.dtypes.inference import is_hashable
@@ -1117,7 +1118,8 @@ def __neg__(self):
         values = com._values_from_object(self)
         if is_bool_dtype(values):
             arr = operator.inv(values)
-        elif (is_numeric_dtype(values) or is_timedelta64_dtype(values)):
+        elif (is_numeric_dtype(values) or is_timedelta64_dtype(values)
+                or is_object_dtype(values)):
             arr = operator.neg(values)
         else:
             raise TypeError("Unary negative expects numeric dtype, not {}"
@@ -1128,7 +1130,8 @@ def __pos__(self):
         values = com._values_from_object(self)
         if (is_bool_dtype(values) or is_period_arraylike(values)):
             arr = values
-        elif (is_numeric_dtype(values) or is_timedelta64_dtype(values)):
+        elif (is_numeric_dtype(values) or is_timedelta64_dtype(values)
+                or is_object_dtype(values)):
             arr = operator.pos(values)
         else:
             raise TypeError("Unary plus expects numeric dtype, not {}"
diff --git a/pandas/tests/frame/test_operators.py b/pandas/tests/frame/test_operators.py
index 5df50f3d7835b..fdf50805ad818 100644
--- a/pandas/tests/frame/test_operators.py
+++ b/pandas/tests/frame/test_operators.py
@@ -3,6 +3,7 @@
 from __future__ import print_function
 from collections import deque
 from datetime import datetime
+from decimal import Decimal
 import operator
 
 import pytest
@@ -282,6 +283,17 @@ def test_neg_numeric(self, df, expected):
         assert_frame_equal(-df, expected)
         assert_series_equal(-df['a'], expected['a'])
 
+    @pytest.mark.parametrize('df, expected', [
+        (np.array([1, 2], dtype=object), np.array([-1, -2], dtype=object)),
+        ([Decimal('1.0'), Decimal('2.0')], [Decimal('-1.0'), Decimal('-2.0')]),
+    ])
+    def test_neg_object(self, df, expected):
+        # GH 21380
+        df = pd.DataFrame({'a': df})
+        expected = pd.DataFrame({'a': expected})
+        assert_frame_equal(-df, expected)
+        assert_series_equal(-df['a'], expected['a'])
+
     @pytest.mark.parametrize('df', [
         pd.DataFrame({'a': ['a', 'b']}),
         pd.DataFrame({'a': pd.to_datetime(['2017-01-22', '1970-01-01'])}),
@@ -307,6 +319,15 @@ def test_pos_numeric(self, df):
 
     @pytest.mark.parametrize('df', [
         pd.DataFrame({'a': ['a', 'b']}),
+        pd.DataFrame({'a': np.array([-1, 2], dtype=object)}),
+        pd.DataFrame({'a': [Decimal('-1.0'), Decimal('2.0')]}),
+    ])
+    def test_pos_object(self, df):
+        # GH 21380
+        assert_frame_equal(+df, df)
+        assert_series_equal(+df['a'], df['a'])
+
+    @pytest.mark.parametrize('df', [
         pd.DataFrame({'a': pd.to_datetime(['2017-01-22', '1970-01-01'])}),
     ])
     def test_pos_raises(self, df):

From dddc81b7fb6d938ad96f40b6953e6db729c96da4 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Fri, 29 Jun 2018 02:39:45 +0200
Subject: [PATCH 31/55] API: re-allow duplicate index level names (#21423)

(cherry picked from commit 66b517c2f51ed20d4c6823272d5c2a0f47f96d84)
---
 doc/source/whatsnew/v0.23.2.txt          |  1 +
 pandas/core/indexes/multi.py             | 19 +++++-------
 pandas/core/reshape/reshape.py           | 12 ++++++++
 pandas/tests/frame/test_alter_axes.py    | 37 +++++++++++++++++++-----
 pandas/tests/frame/test_reshape.py       | 10 +++++++
 pandas/tests/groupby/test_categorical.py |  8 ++---
 pandas/tests/indexes/test_multi.py       | 25 +++++++++-------
 pandas/tests/io/test_pytables.py         |  6 ++++
 pandas/tests/reshape/test_pivot.py       | 10 +++++--
 9 files changed, 90 insertions(+), 38 deletions(-)

diff --git a/doc/source/whatsnew/v0.23.2.txt b/doc/source/whatsnew/v0.23.2.txt
index 07ce99d4f19aa..ab9c3bc3857d6 100644
--- a/doc/source/whatsnew/v0.23.2.txt
+++ b/doc/source/whatsnew/v0.23.2.txt
@@ -53,6 +53,7 @@ Fixed Regressions
 ~~~~~~~~~~~~~~~~~
 
 - Fixed regression in :meth:`to_csv` when handling file-like object incorrectly (:issue:`21471`)
+- Re-allowed duplicate level names of a ``MultiIndex``. Accessing a level that has a duplicate name by name still raises an error (:issue:`19029`).
 - Bug in both :meth:`DataFrame.first_valid_index` and :meth:`Series.first_valid_index` raised for a row index having duplicate values (:issue:`21441`)
 - Fixed regression in unary negative operations with object dtype (:issue:`21380`)
 - Bug in :meth:`Timestamp.ceil` and :meth:`Timestamp.floor` when timestamp is a multiple of the rounding frequency (:issue:`21262`)
diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py
index 80bf73cfe7dd3..33db32cfe1166 100644
--- a/pandas/core/indexes/multi.py
+++ b/pandas/core/indexes/multi.py
@@ -672,30 +672,18 @@ def _set_names(self, names, level=None, validate=True):
 
         if level is None:
             level = range(self.nlevels)
-            used = {}
         else:
             level = [self._get_level_number(l) for l in level]
-            used = {self.levels[l].name: l
-                    for l in set(range(self.nlevels)) - set(level)}
 
         # set the name
         for l, name in zip(level, names):
             if name is not None:
-
                 # GH 20527
                 # All items in 'names' need to be hashable:
                 if not is_hashable(name):
                     raise TypeError('{}.name must be a hashable type'
                                     .format(self.__class__.__name__))
-
-                if name in used:
-                    raise ValueError(
-                        'Duplicated level name: "{}", assigned to '
-                        'level {}, is already used for level '
-                        '{}.'.format(name, l, used[name]))
-
             self.levels[l].rename(name, inplace=True)
-            used[name] = l
 
     names = property(fset=_set_names, fget=_get_names,
                      doc="Names of levels in MultiIndex")
@@ -2935,6 +2923,13 @@ def isin(self, values, level=None):
             else:
                 return np.lib.arraysetops.in1d(labs, sought_labels)
 
+    def _reference_duplicate_name(self, name):
+        """
+        Returns True if the name refered to in self.names is duplicated.
+        """
+        # count the times name equals an element in self.names.
+        return sum(name == n for n in self.names) > 1
+
 
 MultiIndex._add_numeric_methods_disabled()
 MultiIndex._add_numeric_methods_add_sub_disabled()
diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py
index 2757e0797a410..3d9e84954a63b 100644
--- a/pandas/core/reshape/reshape.py
+++ b/pandas/core/reshape/reshape.py
@@ -115,6 +115,12 @@ def __init__(self, values, index, level=-1, value_columns=None,
 
         self.index = index.remove_unused_levels()
 
+        if isinstance(self.index, MultiIndex):
+            if index._reference_duplicate_name(level):
+                msg = ("Ambiguous reference to {level}. The index "
+                       "names are not unique.".format(level=level))
+                raise ValueError(msg)
+
         self.level = self.index._get_level_number(level)
 
         # when index includes `nan`, need to lift levels/strides by 1
@@ -528,6 +534,12 @@ def factorize(index):
 
     N, K = frame.shape
 
+    if isinstance(frame.columns, MultiIndex):
+        if frame.columns._reference_duplicate_name(level):
+            msg = ("Ambiguous reference to {level}. The column "
+                   "names are not unique.".format(level=level))
+            raise ValueError(msg)
+
     # Will also convert negative level numbers and check if out of bounds.
     level_num = frame.columns._get_level_number(level)
 
diff --git a/pandas/tests/frame/test_alter_axes.py b/pandas/tests/frame/test_alter_axes.py
index 164d6746edec0..21961906c39bb 100644
--- a/pandas/tests/frame/test_alter_axes.py
+++ b/pandas/tests/frame/test_alter_axes.py
@@ -130,19 +130,27 @@ def test_set_index2(self):
         result = df.set_index(df.C)
         assert result.index.name == 'C'
 
-    @pytest.mark.parametrize('level', ['a', pd.Series(range(3), name='a')])
+    @pytest.mark.parametrize(
+        'level', ['a', pd.Series(range(0, 8, 2), name='a')])
     def test_set_index_duplicate_names(self, level):
-        # GH18872
+        # GH18872 - GH19029
         df = pd.DataFrame(np.arange(8).reshape(4, 2), columns=['a', 'b'])
 
         # Pass an existing level name:
         df.index.name = 'a'
-        pytest.raises(ValueError, df.set_index, level, append=True)
-        pytest.raises(ValueError, df.set_index, [level], append=True)
-
-        # Pass twice the same level name:
-        df.index.name = 'c'
-        pytest.raises(ValueError, df.set_index, [level, level])
+        expected = pd.MultiIndex.from_tuples([(0, 0), (1, 2), (2, 4), (3, 6)],
+                                             names=['a', 'a'])
+        result = df.set_index(level, append=True)
+        tm.assert_index_equal(result.index, expected)
+        result = df.set_index([level], append=True)
+        tm.assert_index_equal(result.index, expected)
+
+        # Pass twice the same level name (only works with passing actual data)
+        if isinstance(level, pd.Series):
+            result = df.set_index([level, level])
+            expected = pd.MultiIndex.from_tuples(
+                [(0, 0), (2, 2), (4, 4), (6, 6)], names=['a', 'a'])
+            tm.assert_index_equal(result.index, expected)
 
     def test_set_index_nonuniq(self):
         df = DataFrame({'A': ['foo', 'foo', 'foo', 'bar', 'bar'],
@@ -617,6 +625,19 @@ def test_reorder_levels(self):
                              index=e_idx)
         assert_frame_equal(result, expected)
 
+        result = df.reorder_levels([0, 0, 0])
+        e_idx = MultiIndex(levels=[['bar'], ['bar'], ['bar']],
+                           labels=[[0, 0, 0, 0, 0, 0],
+                                   [0, 0, 0, 0, 0, 0],
+                                   [0, 0, 0, 0, 0, 0]],
+                           names=['L0', 'L0', 'L0'])
+        expected = DataFrame({'A': np.arange(6), 'B': np.arange(6)},
+                             index=e_idx)
+        assert_frame_equal(result, expected)
+
+        result = df.reorder_levels(['L0', 'L0', 'L0'])
+        assert_frame_equal(result, expected)
+
     def test_reset_index(self):
         stacked = self.frame.stack()[::2]
         stacked = DataFrame({'foo': stacked, 'bar': stacked})
diff --git a/pandas/tests/frame/test_reshape.py b/pandas/tests/frame/test_reshape.py
index d05321abefca6..ebf6c5e37b916 100644
--- a/pandas/tests/frame/test_reshape.py
+++ b/pandas/tests/frame/test_reshape.py
@@ -560,6 +560,16 @@ def test_unstack_dtypes(self):
             assert left.shape == (3, 2)
             tm.assert_frame_equal(left, right)
 
+    def test_unstack_non_unique_index_names(self):
+        idx = MultiIndex.from_tuples([('a', 'b'), ('c', 'd')],
+                                     names=['c1', 'c1'])
+        df = DataFrame([1, 2], index=idx)
+        with pytest.raises(ValueError):
+            df.unstack('c1')
+
+        with pytest.raises(ValueError):
+            df.T.stack('c1')
+
     def test_unstack_unused_levels(self):
         # GH 17845: unused labels in index make unstack() cast int to float
         idx = pd.MultiIndex.from_product([['a'], ['A', 'B', 'C', 'D']])[:-1]
diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py
index 0fec6a8f96a24..cb76195eacf40 100644
--- a/pandas/tests/groupby/test_categorical.py
+++ b/pandas/tests/groupby/test_categorical.py
@@ -555,15 +555,11 @@ def test_as_index():
         columns=['cat', 'A', 'B'])
     tm.assert_frame_equal(result, expected)
 
-    # another not in-axis grouper
-    s = Series(['a', 'b', 'b'], name='cat2')
+    # another not in-axis grouper (conflicting names in index)
+    s = Series(['a', 'b', 'b'], name='cat')
     result = df.groupby(['cat', s], as_index=False, observed=True).sum()
     tm.assert_frame_equal(result, expected)
 
-    # GH18872: conflicting names in desired index
-    with pytest.raises(ValueError):
-        df.groupby(['cat', s.rename('cat')], observed=True).sum()
-
     # is original index dropped?
     group_columns = ['cat', 'A']
     expected = DataFrame(
diff --git a/pandas/tests/indexes/test_multi.py b/pandas/tests/indexes/test_multi.py
index 3ede83b5969ce..40e64d99ac440 100644
--- a/pandas/tests/indexes/test_multi.py
+++ b/pandas/tests/indexes/test_multi.py
@@ -655,22 +655,27 @@ def test_constructor_nonhashable_names(self):
         # With .set_names()
         tm.assert_raises_regex(TypeError, message, mi.set_names, names=renamed)
 
-    @pytest.mark.parametrize('names', [['a', 'b', 'a'], ['1', '1', '2'],
-                                       ['1', 'a', '1']])
+    @pytest.mark.parametrize('names', [['a', 'b', 'a'], [1, 1, 2],
+                                       [1, 'a', 1]])
     def test_duplicate_level_names(self, names):
-        # GH18872
-        pytest.raises(ValueError, pd.MultiIndex.from_product,
-                      [[0, 1]] * 3, names=names)
+        # GH18872, GH19029
+        mi = pd.MultiIndex.from_product([[0, 1]] * 3, names=names)
+        assert mi.names == names
 
         # With .rename()
         mi = pd.MultiIndex.from_product([[0, 1]] * 3)
-        tm.assert_raises_regex(ValueError, "Duplicated level name:",
-                               mi.rename, names)
+        mi = mi.rename(names)
+        assert mi.names == names
 
         # With .rename(., level=)
-        mi.rename(names[0], level=1, inplace=True)
-        tm.assert_raises_regex(ValueError, "Duplicated level name:",
-                               mi.rename, names[:2], level=[0, 2])
+        mi.rename(names[1], level=1, inplace=True)
+        mi = mi.rename([names[0], names[2]], level=[0, 2])
+        assert mi.names == names
+
+    def test_duplicate_level_names_access_raises(self):
+        self.index.names = ['foo', 'foo']
+        tm.assert_raises_regex(KeyError, 'Level foo not found',
+                               self.index._get_level_number, 'foo')
 
     def assert_multiindex_copied(self, copy, original):
         # Levels should be (at least, shallow copied)
diff --git a/pandas/tests/io/test_pytables.py b/pandas/tests/io/test_pytables.py
index 9cbb62f72f0a0..7dafc9603f96d 100644
--- a/pandas/tests/io/test_pytables.py
+++ b/pandas/tests/io/test_pytables.py
@@ -1842,6 +1842,12 @@ def make_index(names=None):
                            'a', 'b'], index=make_index(['date', 'a', 't']))
             pytest.raises(ValueError, store.append, 'df', df)
 
+            # dup within level
+            _maybe_remove(store, 'df')
+            df = DataFrame(np.zeros((12, 2)), columns=['a', 'b'],
+                           index=make_index(['date', 'date', 'date']))
+            pytest.raises(ValueError, store.append, 'df', df)
+
             # fully names
             _maybe_remove(store, 'df')
             df = DataFrame(np.zeros((12, 2)), columns=[
diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py
index 3ec60d50f2792..b71954163f9e1 100644
--- a/pandas/tests/reshape/test_pivot.py
+++ b/pandas/tests/reshape/test_pivot.py
@@ -1729,9 +1729,15 @@ def test_crosstab_with_numpy_size(self):
         tm.assert_frame_equal(result, expected)
 
     def test_crosstab_dup_index_names(self):
-        # GH 13279, GH 18872
+        # GH 13279
         s = pd.Series(range(3), name='foo')
-        pytest.raises(ValueError, pd.crosstab, s, s)
+
+        result = pd.crosstab(s, s)
+        expected_index = pd.Index(range(3), name='foo')
+        expected = pd.DataFrame(np.eye(3, dtype=np.int64),
+                                index=expected_index,
+                                columns=expected_index)
+        tm.assert_frame_equal(result, expected)
 
     @pytest.mark.parametrize("names", [['a', ('b', 'c')],
                                        [('a', 'b'), 'c']])

From 06d76e0c6dc0008510e7381cb774f183b8e8271b Mon Sep 17 00:00:00 2001
From: david-liu-brattle-1
 <36486871+david-liu-brattle-1@users.noreply.github.com>
Date: Fri, 29 Jun 2018 08:22:15 -0400
Subject: [PATCH 32/55] BUG: to_clipboard fails to format output for Excel
 (#21111)

(cherry picked from commit dc45fbafef172e357cb5decdeab22de67160f5b7)
---
 doc/source/whatsnew/v0.23.2.txt   |  1 +
 pandas/io/clipboards.py           | 32 +++++++++++++++++++++++++------
 pandas/tests/io/test_clipboard.py | 16 ----------------
 3 files changed, 27 insertions(+), 22 deletions(-)

diff --git a/doc/source/whatsnew/v0.23.2.txt b/doc/source/whatsnew/v0.23.2.txt
index ab9c3bc3857d6..608db7487c1e4 100644
--- a/doc/source/whatsnew/v0.23.2.txt
+++ b/doc/source/whatsnew/v0.23.2.txt
@@ -57,6 +57,7 @@ Fixed Regressions
 - Bug in both :meth:`DataFrame.first_valid_index` and :meth:`Series.first_valid_index` raised for a row index having duplicate values (:issue:`21441`)
 - Fixed regression in unary negative operations with object dtype (:issue:`21380`)
 - Bug in :meth:`Timestamp.ceil` and :meth:`Timestamp.floor` when timestamp is a multiple of the rounding frequency (:issue:`21262`)
+- Fixed regression in :func:`to_clipboard` that defaulted to copying dataframes with space delimited instead of tab delimited (:issue:`21104`)
 
 .. _whatsnew_0232.performance:
 
diff --git a/pandas/io/clipboards.py b/pandas/io/clipboards.py
index dcc221ce978b3..b3f40b3a2429c 100644
--- a/pandas/io/clipboards.py
+++ b/pandas/io/clipboards.py
@@ -1,6 +1,7 @@
 """ io on the clipboard """
 from pandas import compat, get_option, option_context, DataFrame
-from pandas.compat import StringIO, PY2
+from pandas.compat import StringIO, PY2, PY3
+import warnings
 
 
 def read_clipboard(sep=r'\s+', **kwargs):  # pragma: no cover
@@ -32,7 +33,7 @@ def read_clipboard(sep=r'\s+', **kwargs):  # pragma: no cover
 
     # try to decode (if needed on PY3)
     # Strange. linux py33 doesn't complain, win py33 does
-    if compat.PY3:
+    if PY3:
         try:
             text = compat.bytes_to_str(
                 text, encoding=(kwargs.get('encoding') or
@@ -55,11 +56,27 @@ def read_clipboard(sep=r'\s+', **kwargs):  # pragma: no cover
 
     counts = {x.lstrip().count('\t') for x in lines}
     if len(lines) > 1 and len(counts) == 1 and counts.pop() != 0:
-        sep = r'\t'
+        sep = '\t'
 
+    # Edge case where sep is specified to be None, return to default
     if sep is None and kwargs.get('delim_whitespace') is None:
         sep = r'\s+'
 
+    # Regex separator currently only works with python engine.
+    # Default to python if separator is multi-character (regex)
+    if len(sep) > 1 and kwargs.get('engine') is None:
+        kwargs['engine'] = 'python'
+    elif len(sep) > 1 and kwargs.get('engine') == 'c':
+        warnings.warn('read_clipboard with regex separator does not work'
+                      ' properly with c engine')
+
+    # In PY2, the c table reader first encodes text with UTF-8 but Python
+    # table reader uses the format of the passed string. For consistency,
+    # encode strings for python engine so that output from python and c
+    # engines produce consistent results
+    if kwargs.get('engine') == 'python' and PY2:
+        text = text.encode('utf-8')
+
     return read_table(StringIO(text), sep=sep, **kwargs)
 
 
@@ -99,7 +116,7 @@ def to_clipboard(obj, excel=True, sep=None, **kwargs):  # pragma: no cover
     if excel:
         try:
             if sep is None:
-                sep = r'\t'
+                sep = '\t'
             buf = StringIO()
             # clipboard_set (pyperclip) expects unicode
             obj.to_csv(buf, sep=sep, encoding='utf-8', **kwargs)
@@ -108,8 +125,11 @@ def to_clipboard(obj, excel=True, sep=None, **kwargs):  # pragma: no cover
                 text = text.decode('utf-8')
             clipboard_set(text)
             return
-        except:
-            pass
+        except TypeError:
+            warnings.warn('to_clipboard in excel mode requires a single '
+                          'character separator.')
+    elif sep is not None:
+        warnings.warn('to_clipboard with excel=False ignores the sep argument')
 
     if isinstance(obj, DataFrame):
         # str(df) has various unhelpful defaults, like truncation
diff --git a/pandas/tests/io/test_clipboard.py b/pandas/tests/io/test_clipboard.py
index 80fddd50fc9a8..a6b331685e72a 100644
--- a/pandas/tests/io/test_clipboard.py
+++ b/pandas/tests/io/test_clipboard.py
@@ -88,8 +88,6 @@ def check_round_trip_frame(self, data, excel=None, sep=None,
         tm.assert_frame_equal(data, result, check_dtype=False)
 
     # Test that default arguments copy as tab delimited
-    @pytest.mark.xfail(reason='to_clipboard defaults to space delim. '
-                       'Issue in #21104, Fixed in #21111')
     def test_round_trip_frame(self, df):
         self.check_round_trip_frame(df)
 
@@ -99,10 +97,6 @@ def test_round_trip_frame_sep(self, df, sep):
         self.check_round_trip_frame(df, sep=sep)
 
     # Test white space separator
-    @pytest.mark.xfail(reason="Fails on 'delims' df because quote escapes "
-                       "aren't handled correctly in default c engine. Fixed "
-                       "in #21111 by defaulting to python engine for "
-                       "whitespace separator")
     def test_round_trip_frame_string(self, df):
         df.to_clipboard(excel=False, sep=None)
         result = read_clipboard()
@@ -111,21 +105,17 @@ def test_round_trip_frame_string(self, df):
 
     # Two character separator is not supported in to_clipboard
     # Test that multi-character separators are not silently passed
-    @pytest.mark.xfail(reason="Not yet implemented.  Fixed in #21111")
     def test_excel_sep_warning(self, df):
         with tm.assert_produces_warning():
             df.to_clipboard(excel=True, sep=r'\t')
 
     # Separator is ignored when excel=False and should produce a warning
-    @pytest.mark.xfail(reason="Not yet implemented.  Fixed in #21111")
     def test_copy_delim_warning(self, df):
         with tm.assert_produces_warning():
             df.to_clipboard(excel=False, sep='\t')
 
     # Tests that the default behavior of to_clipboard is tab
     # delimited and excel="True"
-    @pytest.mark.xfail(reason="to_clipboard defaults to space delim. Issue in "
-                       "#21104, Fixed in #21111")
     @pytest.mark.parametrize('sep', ['\t', None, 'default'])
     @pytest.mark.parametrize('excel', [True, None, 'default'])
     def test_clipboard_copy_tabs_default(self, sep, excel, df):
@@ -139,10 +129,6 @@ def test_clipboard_copy_tabs_default(self, sep, excel, df):
             assert clipboard_get() == df.to_csv(sep='\t')
 
     # Tests reading of white space separated tables
-    @pytest.mark.xfail(reason="Fails on 'delims' df because quote escapes "
-                       "aren't handled correctly. in default c engine. Fixed "
-                       "in #21111 by defaulting to python engine for "
-                       "whitespace separator")
     @pytest.mark.parametrize('sep', [None, 'default'])
     @pytest.mark.parametrize('excel', [False])
     def test_clipboard_copy_strings(self, sep, excel, df):
@@ -193,8 +179,6 @@ def test_invalid_encoding(self, df):
         with pytest.raises(NotImplementedError):
             pd.read_clipboard(encoding='ascii')
 
-    @pytest.mark.xfail(reason='to_clipboard defaults to space delim. '
-                       'Issue in #21104, Fixed in #21111')
     @pytest.mark.parametrize('enc', ['UTF-8', 'utf-8', 'utf8'])
     def test_round_trip_valid_encodings(self, enc, df):
         self.check_round_trip_frame(df, encoding=enc)

From 2fccdedda2c4bb0e5b9edce8269cdecc973b191d Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Mon, 2 Jul 2018 17:26:43 +0200
Subject: [PATCH 33/55]  BUG: fix reindexing MultiIndex with categorical
 datetime-like level  (#21657)

(cherry picked from commit 1cc547185b92073a3465ea105055d7791e9e6c48)
---
 doc/source/whatsnew/v0.23.2.txt               |  2 ++
 pandas/core/indexes/multi.py                  | 26 +++++++++----------
 .../tests/frame/test_axis_select_reindex.py   | 15 ++++++++++-
 pandas/tests/groupby/test_categorical.py      | 20 ++++++++++++++
 pandas/tests/indexes/test_multi.py            | 12 +++++++--
 5 files changed, 58 insertions(+), 17 deletions(-)

diff --git a/doc/source/whatsnew/v0.23.2.txt b/doc/source/whatsnew/v0.23.2.txt
index 608db7487c1e4..bef90506477ed 100644
--- a/doc/source/whatsnew/v0.23.2.txt
+++ b/doc/source/whatsnew/v0.23.2.txt
@@ -55,6 +55,8 @@ Fixed Regressions
 - Fixed regression in :meth:`to_csv` when handling file-like object incorrectly (:issue:`21471`)
 - Re-allowed duplicate level names of a ``MultiIndex``. Accessing a level that has a duplicate name by name still raises an error (:issue:`19029`).
 - Bug in both :meth:`DataFrame.first_valid_index` and :meth:`Series.first_valid_index` raised for a row index having duplicate values (:issue:`21441`)
+- Fixed regression in :meth:`~DataFrame.reindex` and :meth:`~DataFrame.groupby`
+  with a MultiIndex or multiple keys that contains categorical datetime-like values (:issue:`21390`).
 - Fixed regression in unary negative operations with object dtype (:issue:`21380`)
 - Bug in :meth:`Timestamp.ceil` and :meth:`Timestamp.floor` when timestamp is a multiple of the rounding frequency (:issue:`21262`)
 - Fixed regression in :func:`to_clipboard` that defaulted to copying dataframes with space delimited instead of tab delimited (:issue:`21104`)
diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py
index 33db32cfe1166..9a4aa15f4cc25 100644
--- a/pandas/core/indexes/multi.py
+++ b/pandas/core/indexes/multi.py
@@ -11,6 +11,8 @@
 from pandas.compat.numpy import function as nv
 from pandas import compat
 
+from pandas.core.dtypes.dtypes import (
+    ExtensionDtype, PandasExtensionDtype)
 from pandas.core.dtypes.common import (
     _ensure_int64,
     _ensure_platform_int,
@@ -808,20 +810,16 @@ def values(self):
             return self._tuples
 
         values = []
-        for lev, lab in zip(self.levels, self.labels):
-            # Need to box timestamps, etc.
-            box = hasattr(lev, '_box_values')
-            # Try to minimize boxing.
-            if box and len(lev) > len(lab):
-                taken = lev._box_values(algos.take_1d(lev._ndarray_values,
-                                                      lab))
-            elif box:
-                taken = algos.take_1d(lev._box_values(lev._ndarray_values),
-                                      lab,
-                                      fill_value=lev._na_value)
-            else:
-                taken = algos.take_1d(np.asarray(lev._values), lab)
-            values.append(taken)
+
+        for i in range(self.nlevels):
+            vals = self._get_level_values(i)
+            if is_categorical_dtype(vals):
+                vals = vals.get_values()
+            if (isinstance(vals.dtype, (PandasExtensionDtype, ExtensionDtype))
+                    or hasattr(vals, '_box_values')):
+                vals = vals.astype(object)
+            vals = np.array(vals, copy=False)
+            values.append(vals)
 
         self._tuples = lib.fast_zip(values)
         return self._tuples
diff --git a/pandas/tests/frame/test_axis_select_reindex.py b/pandas/tests/frame/test_axis_select_reindex.py
index 0e0d6598f5101..004fb4eb0c128 100644
--- a/pandas/tests/frame/test_axis_select_reindex.py
+++ b/pandas/tests/frame/test_axis_select_reindex.py
@@ -10,7 +10,7 @@
 import numpy as np
 
 from pandas.compat import lrange, lzip, u
-from pandas import (compat, DataFrame, Series, Index, MultiIndex,
+from pandas import (compat, DataFrame, Series, Index, MultiIndex, Categorical,
                     date_range, isna)
 import pandas as pd
 
@@ -1129,6 +1129,19 @@ def test_reindex_multi(self):
 
         assert_frame_equal(result, expected)
 
+    def test_reindex_multi_categorical_time(self):
+        # https://github.com/pandas-dev/pandas/issues/21390
+        midx = pd.MultiIndex.from_product(
+            [Categorical(['a', 'b', 'c']),
+             Categorical(date_range("2012-01-01", periods=3, freq='H'))])
+        df = pd.DataFrame({'a': range(len(midx))}, index=midx)
+        df2 = df.iloc[[0, 1, 2, 3, 4, 5, 6, 8]]
+
+        result = df2.reindex(midx)
+        expected = pd.DataFrame(
+            {'a': [0, 1, 2, 3, 4, 5, 6, np.nan, 8]}, index=midx)
+        assert_frame_equal(result, expected)
+
     data = [[1, 2, 3], [1, 2, 3]]
 
     @pytest.mark.parametrize('actual', [
diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py
index cb76195eacf40..d021396a7acb3 100644
--- a/pandas/tests/groupby/test_categorical.py
+++ b/pandas/tests/groupby/test_categorical.py
@@ -850,3 +850,23 @@ def test_empty_prod():
     result = df.groupby("A", observed=False).B.prod(min_count=1)
     expected = pd.Series([2, 1, np.nan], expected_idx, name='B')
     tm.assert_series_equal(result, expected)
+
+
+def test_groupby_multiindex_categorical_datetime():
+    # https://github.com/pandas-dev/pandas/issues/21390
+
+    df = pd.DataFrame({
+        'key1': pd.Categorical(list('abcbabcba')),
+        'key2': pd.Categorical(
+            list(pd.date_range('2018-06-01 00', freq='1T', periods=3)) * 3),
+        'values': np.arange(9),
+    })
+    result = df.groupby(['key1', 'key2']).mean()
+
+    idx = pd.MultiIndex.from_product(
+        [pd.Categorical(['a', 'b', 'c']),
+         pd.Categorical(pd.date_range('2018-06-01 00', freq='1T', periods=3))],
+        names=['key1', 'key2'])
+    expected = pd.DataFrame(
+        {'values': [0, 4, 8, 3, 4, 5, 6, np.nan, 2]}, index=idx)
+    assert_frame_equal(result, expected)
diff --git a/pandas/tests/indexes/test_multi.py b/pandas/tests/indexes/test_multi.py
index 40e64d99ac440..a7e90207c9ad7 100644
--- a/pandas/tests/indexes/test_multi.py
+++ b/pandas/tests/indexes/test_multi.py
@@ -12,8 +12,8 @@
 
 import pandas as pd
 
-from pandas import (CategoricalIndex, DataFrame, Index, MultiIndex,
-                    compat, date_range, period_range)
+from pandas import (CategoricalIndex, Categorical, DataFrame, Index,
+                    MultiIndex, compat, date_range, period_range)
 from pandas.compat import PY3, long, lrange, lzip, range, u, PYPY
 from pandas.errors import PerformanceWarning, UnsortedIndexError
 from pandas.core.dtypes.dtypes import CategoricalDtype
@@ -1595,6 +1595,14 @@ def test_get_indexer_nearest(self):
         with pytest.raises(NotImplementedError):
             midx.get_indexer(['a'], method='pad', tolerance=2)
 
+    def test_get_indexer_categorical_time(self):
+        # https://github.com/pandas-dev/pandas/issues/21390
+        midx = MultiIndex.from_product(
+            [Categorical(['a', 'b', 'c']),
+             Categorical(date_range("2012-01-01", periods=3, freq='H'))])
+        result = midx.get_indexer(midx)
+        tm.assert_numpy_array_equal(result, np.arange(9, dtype=np.intp))
+
     def test_hash_collisions(self):
         # non-smoke test that we don't get hash collisions
 

From a74ee5496900e80bdc653899555fc701ce344bf7 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <TomAugspurger@users.noreply.github.com>
Date: Mon, 2 Jul 2018 10:28:19 -0500
Subject: [PATCH 34/55] BUG: Fix MI repr with long names (#21655)

(cherry picked from commit ad76ffcca0d92c3885c279c80701c2f4a3f3f177)
---
 doc/source/whatsnew/v0.23.2.txt        |  1 +
 pandas/io/formats/format.py            | 10 +++++--
 pandas/tests/io/formats/test_format.py | 38 ++++++++++++++++++++++++++
 3 files changed, 46 insertions(+), 3 deletions(-)

diff --git a/doc/source/whatsnew/v0.23.2.txt b/doc/source/whatsnew/v0.23.2.txt
index bef90506477ed..61d1b83ea8f2e 100644
--- a/doc/source/whatsnew/v0.23.2.txt
+++ b/doc/source/whatsnew/v0.23.2.txt
@@ -55,6 +55,7 @@ Fixed Regressions
 - Fixed regression in :meth:`to_csv` when handling file-like object incorrectly (:issue:`21471`)
 - Re-allowed duplicate level names of a ``MultiIndex``. Accessing a level that has a duplicate name by name still raises an error (:issue:`19029`).
 - Bug in both :meth:`DataFrame.first_valid_index` and :meth:`Series.first_valid_index` raised for a row index having duplicate values (:issue:`21441`)
+- Fixed printing of DataFrames with hierarchical columns with long names (:issue:`21180`)
 - Fixed regression in :meth:`~DataFrame.reindex` and :meth:`~DataFrame.groupby`
   with a MultiIndex or multiple keys that contains categorical datetime-like values (:issue:`21390`).
 - Fixed regression in unary negative operations with object dtype (:issue:`21380`)
diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py
index 12201f62946ac..c46f4b5ad9c18 100644
--- a/pandas/io/formats/format.py
+++ b/pandas/io/formats/format.py
@@ -636,10 +636,14 @@ def to_string(self):
                     mid = int(round(n_cols / 2.))
                     mid_ix = col_lens.index[mid]
                     col_len = col_lens[mid_ix]
-                    adj_dif -= (col_len + 1)  # adjoin adds one
+                    # adjoin adds one
+                    adj_dif -= (col_len + 1)
                     col_lens = col_lens.drop(mid_ix)
                     n_cols = len(col_lens)
-                max_cols_adj = n_cols - self.index  # subtract index column
+                # subtract index column
+                max_cols_adj = n_cols - self.index
+                # GH-21180. Ensure that we print at least two.
+                max_cols_adj = max(max_cols_adj, 2)
                 self.max_cols_adj = max_cols_adj
 
                 # Call again _chk_truncate to cut frame appropriately
@@ -778,7 +782,7 @@ def space_format(x, y):
 
             str_columns = list(zip(*[[space_format(x, y) for y in x]
                                      for x in fmt_columns]))
-            if self.sparsify:
+            if self.sparsify and len(str_columns):
                 str_columns = _sparsify(str_columns)
 
             str_columns = [list(x) for x in zip(*str_columns)]
diff --git a/pandas/tests/io/formats/test_format.py b/pandas/tests/io/formats/test_format.py
index 63b7cb3459069..191e3f37f1c37 100644
--- a/pandas/tests/io/formats/test_format.py
+++ b/pandas/tests/io/formats/test_format.py
@@ -305,6 +305,44 @@ def test_repr_non_interactive(self):
             assert not has_truncated_repr(df)
             assert not has_expanded_repr(df)
 
+    def test_repr_truncates_terminal_size(self):
+        # https://github.com/pandas-dev/pandas/issues/21180
+        # TODO: use mock fixutre.
+        # This is being backported, so doing it directly here.
+        try:
+            from unittest import mock
+        except ImportError:
+            mock = pytest.importorskip("mock")
+
+        terminal_size = (118, 96)
+        p1 = mock.patch('pandas.io.formats.console.get_terminal_size',
+                        return_value=terminal_size)
+        p2 = mock.patch('pandas.io.formats.format.get_terminal_size',
+                        return_value=terminal_size)
+
+        index = range(5)
+        columns = pd.MultiIndex.from_tuples([
+            ('This is a long title with > 37 chars.', 'cat'),
+            ('This is a loooooonger title with > 43 chars.', 'dog'),
+        ])
+        df = pd.DataFrame(1, index=index, columns=columns)
+
+        with p1, p2:
+            result = repr(df)
+
+        h1, h2 = result.split('\n')[:2]
+        assert 'long' in h1
+        assert 'loooooonger' in h1
+        assert 'cat' in h2
+        assert 'dog' in h2
+
+        # regular columns
+        df2 = pd.DataFrame({"A" * 41: [1, 2], 'B' * 41: [1, 2]})
+        with p1, p2:
+            result = repr(df2)
+
+        assert df2.columns[0] in result.split('\n')[0]
+
     def test_repr_max_columns_max_rows(self):
         term_width, term_height = get_terminal_size()
         if term_width < 10 or term_height < 10:

From 1d3766c3fd303672f29be4a71919c37443450ad8 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Thu, 5 Jul 2018 21:05:12 +0200
Subject: [PATCH 35/55] DOC: clean-up 0.23.2 whatsnew file (#21750)

(cherry picked from commit 2f0773f49a64d23774d66c30988c80541fd7bb6f)
---
 doc/source/whatsnew/v0.23.2.txt | 40 ++-------------------------------
 1 file changed, 2 insertions(+), 38 deletions(-)

diff --git a/doc/source/whatsnew/v0.23.2.txt b/doc/source/whatsnew/v0.23.2.txt
index 61d1b83ea8f2e..2d7808363648b 100644
--- a/doc/source/whatsnew/v0.23.2.txt
+++ b/doc/source/whatsnew/v0.23.2.txt
@@ -62,19 +62,6 @@ Fixed Regressions
 - Bug in :meth:`Timestamp.ceil` and :meth:`Timestamp.floor` when timestamp is a multiple of the rounding frequency (:issue:`21262`)
 - Fixed regression in :func:`to_clipboard` that defaulted to copying dataframes with space delimited instead of tab delimited (:issue:`21104`)
 
-.. _whatsnew_0232.performance:
-
-Performance Improvements
-~~~~~~~~~~~~~~~~~~~~~~~~
-
--
--
-
-Documentation Changes
-~~~~~~~~~~~~~~~~~~~~~
-
--
--
 
 Build Changes
 -------------
@@ -86,55 +73,32 @@ Build Changes
 Bug Fixes
 ~~~~~~~~~
 
-**Groupby/Resample/Rolling**
-
--
--
-
-**Timedelta**
-
-- Bug in :class:`Timedelta` where non-zero timedeltas shorter than 1 microsecond were considered False (:issue:`21484`)
-
 **Conversion**
 
 - Bug in constructing :class:`Index` with an iterator or generator (:issue:`21470`)
 - Bug in :meth:`Series.nlargest` for signed and unsigned integer dtypes when the minimum value is present (:issue:`21426`)
 
-
 **Indexing**
 
 - Bug in :meth:`Index.get_indexer_non_unique` with categorical key (:issue:`21448`)
 - Bug in comparison operations for :class:`MultiIndex` where error was raised on equality / inequality comparison involving a MultiIndex with ``nlevels == 1`` (:issue:`21149`)
 - Bug in :meth:`DataFrame.drop` behaviour is not consistent for unique and non-unique indexes (:issue:`21494`)
 - Bug in :func:`DataFrame.duplicated` with a large number of columns causing a 'maximum recursion depth exceeded' (:issue:`21524`).
--
 
 **I/O**
 
 - Bug in :func:`read_csv` that caused it to incorrectly raise an error when ``nrows=0``, ``low_memory=True``, and ``index_col`` was not ``None`` (:issue:`21141`)
 - Bug in :func:`json_normalize` when formatting the ``record_prefix`` with integer columns (:issue:`21536`)
--
-
-**Plotting**
-
--
--
-
-**Reshaping**
-
--
--
 
 **Categorical**
 
 - Bug in rendering :class:`Series` with ``Categorical`` dtype in rare conditions under Python 2.7 (:issue:`21002`)
--
 
 **Timezones**
 
 - Bug in :class:`Timestamp` and :class:`DatetimeIndex` where passing a :class:`Timestamp` localized after a DST transition would return a datetime before the DST transition (:issue:`20854`)
 - Bug in comparing :class:`DataFrame`s with tz-aware :class:`DatetimeIndex` columns with a DST transition that raised a ``KeyError`` (:issue:`19970`)
 
-**Other**
+**Timedelta**
 
--
+- Bug in :class:`Timedelta` where non-zero timedeltas shorter than 1 microsecond were considered False (:issue:`21484`)

From de4455663215d2a8767fbc14e29f1e5e320603d5 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Thu, 5 Jul 2018 15:49:24 -0500
Subject: [PATCH 36/55] RLS: release notes for 0.23.2 (#21752)

(cherry picked from commit bd8ba3680eae9c19221ef7200928bcef68508f4a)
---
 doc/source/release.rst          | 34 +++++++++++++++++++++++++++++++++
 doc/source/whatsnew.rst         |  2 ++
 doc/source/whatsnew/v0.23.2.txt |  2 +-
 3 files changed, 37 insertions(+), 1 deletion(-)

diff --git a/doc/source/release.rst b/doc/source/release.rst
index 2f7eedfbe9a45..08200d4d276cc 100644
--- a/doc/source/release.rst
+++ b/doc/source/release.rst
@@ -37,6 +37,40 @@ analysis / manipulation tool available in any language.
 * Binary installers on PyPI: https://pypi.org/project/pandas
 * Documentation: http://pandas.pydata.org
 
+pandas 0.23.2
+-------------
+
+**Release date**: July 5, 2018
+
+This is a minor bug-fix release in the 0.23.x series and includes some small regression fixes
+and bug fixes.
+
+See the :ref:`full whatsnew <whatsnew_0232>` for a list of all the changes.
+
+Thanks
+~~~~~~
+
+A total of 17 people contributed to this release.  People with a "+" by their
+names contributed a patch for the first time.
+
+* David Krych
+* Jacopo Rota +
+* Jeff Reback
+* Jeremy Schendel
+* Joris Van den Bossche
+* Kalyan Gokhale
+* Matthew Roeschke
+* Michael Odintsov +
+* Ming Li
+* Pietro Battiston
+* Tom Augspurger
+* Uddeshya Singh
+* Vu Le +
+* alimcmaster1 +
+* david-liu-brattle-1 +
+* gfyoung
+* jbrockmendel
+
 pandas 0.23.1
 -------------
 
diff --git a/doc/source/whatsnew.rst b/doc/source/whatsnew.rst
index eb9211d0ceb02..0972cc9432f8e 100644
--- a/doc/source/whatsnew.rst
+++ b/doc/source/whatsnew.rst
@@ -18,6 +18,8 @@ What's New
 
 These are new features and improvements of note in each release.
 
+.. include:: whatsnew/v0.23.2.txt
+
 .. include:: whatsnew/v0.23.1.txt
 
 .. include:: whatsnew/v0.23.0.txt
diff --git a/doc/source/whatsnew/v0.23.2.txt b/doc/source/whatsnew/v0.23.2.txt
index 2d7808363648b..bd86576ad8586 100644
--- a/doc/source/whatsnew/v0.23.2.txt
+++ b/doc/source/whatsnew/v0.23.2.txt
@@ -64,7 +64,7 @@ Fixed Regressions
 
 
 Build Changes
--------------
+~~~~~~~~~~~~~
 
 - The source and binary distributions no longer include test data files, resulting in smaller download sizes. Tests relying on these data files will be skipped when using ``pandas.test()``. (:issue:`19320`)
 

From 9b0f560a73d11b2fa72c48d7fd16126b5137f349 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Thu, 5 Jul 2018 17:04:24 -0500
Subject: [PATCH 37/55] RLS: 0.23.2


From e2f65df75efbfbb914f22605d139f73967211905 Mon Sep 17 00:00:00 2001
From: "meeseeksdev[bot]" <meeseeksdev[bot]@users.noreply.github.com>
Date: Fri, 6 Jul 2018 11:32:05 -0500
Subject: [PATCH 38/55] Backport PR #21771: Whatsnew note for v0.23.3 (#21772)

---
 doc/source/whatsnew/v0.23.3.txt | 55 +++++++++++++++++++++++++++++++++
 1 file changed, 55 insertions(+)
 create mode 100644 doc/source/whatsnew/v0.23.3.txt

diff --git a/doc/source/whatsnew/v0.23.3.txt b/doc/source/whatsnew/v0.23.3.txt
new file mode 100644
index 0000000000000..d308cf7a3cfac
--- /dev/null
+++ b/doc/source/whatsnew/v0.23.3.txt
@@ -0,0 +1,55 @@
+.. _whatsnew_0233:
+
+v0.23.3
+-------
+
+This is a minor bug-fix release in the 0.23.x series and includes some small regression fixes
+and bug fixes. We recommend that all users upgrade to this version.
+
+
+.. contents:: What's new in v0.23.3
+    :local:
+    :backlinks: none
+
+.. _whatsnew_0233.fixed_regressions:
+
+Fixed Regressions
+~~~~~~~~~~~~~~~~~
+
+- 
+-
+
+.. _whatsnew_0233.bug_fixes:
+
+Bug Fixes
+~~~~~~~~~
+
+**Conversion**
+
+-
+-
+
+**Indexing**
+
+-
+-
+
+**I/O**
+
+-
+-
+
+**Categorical**
+
+-
+-
+
+**Timezones**
+
+-
+-
+
+**Timedelta**
+
+-
+-

From d2b7b2b2913d5da18f8df476a51b7f2f521ed99d Mon Sep 17 00:00:00 2001
From: Tom Augspurger <TomAugspurger@users.noreply.github.com>
Date: Sat, 7 Jul 2018 09:31:26 -0500
Subject: [PATCH 39/55] 0.23.3 fixup (#21788)

* Move 0.23.3 to 0.23.4

* 0.23.3 whatsnew

(cherry picked from commit a3f8f14b24032151ba57c36f0a70192e13bfd116)
---
 doc/source/whatsnew/v0.23.3.txt | 56 +++---------------------------
 doc/source/whatsnew/v0.23.4.txt | 60 +++++++++++++++++++++++++++++++++
 2 files changed, 64 insertions(+), 52 deletions(-)
 create mode 100644 doc/source/whatsnew/v0.23.4.txt

diff --git a/doc/source/whatsnew/v0.23.3.txt b/doc/source/whatsnew/v0.23.3.txt
index d308cf7a3cfac..b8adce27d2523 100644
--- a/doc/source/whatsnew/v0.23.3.txt
+++ b/doc/source/whatsnew/v0.23.3.txt
@@ -1,55 +1,7 @@
 .. _whatsnew_0233:
 
-v0.23.3
--------
+v0.23.3 (July 7, 2018)
+----------------------
 
-This is a minor bug-fix release in the 0.23.x series and includes some small regression fixes
-and bug fixes. We recommend that all users upgrade to this version.
-
-
-.. contents:: What's new in v0.23.3
-    :local:
-    :backlinks: none
-
-.. _whatsnew_0233.fixed_regressions:
-
-Fixed Regressions
-~~~~~~~~~~~~~~~~~
-
-- 
--
-
-.. _whatsnew_0233.bug_fixes:
-
-Bug Fixes
-~~~~~~~~~
-
-**Conversion**
-
--
--
-
-**Indexing**
-
--
--
-
-**I/O**
-
--
--
-
-**Categorical**
-
--
--
-
-**Timezones**
-
--
--
-
-**Timedelta**
-
--
--
+This release fixes a build issue with the sdist for Python 3.7 (:issue:`21785`)
+There are no other changes.
diff --git a/doc/source/whatsnew/v0.23.4.txt b/doc/source/whatsnew/v0.23.4.txt
new file mode 100644
index 0000000000000..a88c22e3d01f7
--- /dev/null
+++ b/doc/source/whatsnew/v0.23.4.txt
@@ -0,0 +1,60 @@
+.. _whatsnew_0234:
+
+v0.23.4
+-------
+
+This is a minor bug-fix release in the 0.23.x series and includes some small regression fixes
+and bug fixes. We recommend that all users upgrade to this version.
+
+
+.. contents:: What's new in v0.23.4
+    :local:
+    :backlinks: none
+
+.. _whatsnew_0234.fixed_regressions:
+
+Fixed Regressions
+~~~~~~~~~~~~~~~~~
+
+- 
+-
+
+.. _whatsnew_0234.bug_fixes:
+
+Bug Fixes
+~~~~~~~~~
+
+**Groupby/Resample/Rolling**
+
+- Bug where calling :func:`DataFrameGroupBy.agg` with a list of functions including ``ohlc`` as the non-initial element would raise a ``ValueError`` (:issue:`21716`)
+-
+
+**Conversion**
+
+-
+-
+
+**Indexing**
+
+-
+-
+
+**I/O**
+
+-
+-
+
+**Categorical**
+
+-
+-
+
+**Timezones**
+
+-
+-
+
+**Timedelta**
+
+-
+-

From a24750fbff99971ef3a31b610e74c9a0945f2aa0 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Sat, 7 Jul 2018 09:53:25 -0500
Subject: [PATCH 40/55] DOC: Updated whatsnew.rst

---
 doc/source/whatsnew.rst | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/doc/source/whatsnew.rst b/doc/source/whatsnew.rst
index 0972cc9432f8e..afd274332b3df 100644
--- a/doc/source/whatsnew.rst
+++ b/doc/source/whatsnew.rst
@@ -18,6 +18,8 @@ What's New
 
 These are new features and improvements of note in each release.
 
+.. include:: whatsnew/v0.23.3.txt
+
 .. include:: whatsnew/v0.23.2.txt
 
 .. include:: whatsnew/v0.23.1.txt

From edb71fda022c6a155717e7a25679040ee0476639 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Sat, 7 Jul 2018 10:09:56 -0500
Subject: [PATCH 41/55] RLS: 0.23.3


From b7a2cd4a4c6ea235005aecbc2911034c6064afd3 Mon Sep 17 00:00:00 2001
From: Jeff Reback <jeff@reback.net>
Date: Sat, 7 Jul 2018 13:57:42 -0500
Subject: [PATCH 42/55] Removed Need for OHLC As First Element if Used in .agg
 (#21769) (#21794)

---
 pandas/core/groupby/groupby.py       |  6 ++----
 pandas/tests/groupby/test_groupby.py | 19 +++++++++++++++++++
 2 files changed, 21 insertions(+), 4 deletions(-)

diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index df7a5dc9dc173..9d227ef37595f 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -3557,13 +3557,11 @@ def _aggregate_multiple_funcs(self, arg, _level):
                 obj._selection = name
             results[name] = obj.aggregate(func)
 
-        if isinstance(list(compat.itervalues(results))[0],
-                      DataFrame):
-
+        if any(isinstance(x, DataFrame) for x in compat.itervalues(results)):
             # let higher level handle
             if _level:
                 return results
-            return list(compat.itervalues(results))[0]
+
         return DataFrame(results, columns=columns)
 
     def _wrap_output(self, output, index, names=None):
diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py
index e05f9de5ea7f4..66577d738dd28 100644
--- a/pandas/tests/groupby/test_groupby.py
+++ b/pandas/tests/groupby/test_groupby.py
@@ -1674,3 +1674,22 @@ def test_tuple_correct_keyerror():
                                                           [3, 4]]))
     with tm.assert_raises_regex(KeyError, "(7, 8)"):
         df.groupby((7, 8)).mean()
+
+
+def test_groupby_agg_ohlc_non_first():
+    # GH 21716
+    df = pd.DataFrame([[1], [1]], columns=['foo'],
+                      index=pd.date_range('2018-01-01', periods=2, freq='D'))
+
+    expected = pd.DataFrame([
+        [1, 1, 1, 1, 1],
+        [1, 1, 1, 1, 1]
+    ], columns=pd.MultiIndex.from_tuples((
+        ('foo', 'ohlc', 'open'), ('foo', 'ohlc', 'high'),
+        ('foo', 'ohlc', 'low'), ('foo', 'ohlc', 'close'),
+        ('foo', 'sum', 'foo'))), index=pd.date_range(
+            '2018-01-01', periods=2, freq='D'))
+
+    result = df.groupby(pd.Grouper(freq='D')).agg(['sum', 'ohlc'])
+
+    tm.assert_frame_equal(result, expected)

From 5609eff083baeacbfc80ce9c3a086c7530a7f2b4 Mon Sep 17 00:00:00 2001
From: "meeseeksdev[bot]" <meeseeksdev[bot]@users.noreply.github.com>
Date: Wed, 18 Jul 2018 21:41:02 -0400
Subject: [PATCH 43/55] Backport PR #21921: BUG:Clip with a list-like threshold
 with a nan is broken (GH19992) (#21967)

---
 doc/source/whatsnew/v0.23.4.txt       |  4 ++++
 pandas/core/generic.py                |  6 ++++--
 pandas/tests/frame/test_analytics.py  | 18 ++++++++++++++----
 pandas/tests/series/test_analytics.py |  8 ++++++--
 4 files changed, 28 insertions(+), 8 deletions(-)

diff --git a/doc/source/whatsnew/v0.23.4.txt b/doc/source/whatsnew/v0.23.4.txt
index a88c22e3d01f7..5e19ab491647d 100644
--- a/doc/source/whatsnew/v0.23.4.txt
+++ b/doc/source/whatsnew/v0.23.4.txt
@@ -58,3 +58,7 @@ Bug Fixes
 
 -
 -
+
+**Missing**
+
+- Bug in :func:`Series.clip` and :func:`DataFrame.clip` cannot accept list-like threshold containing ``NaN`` (:issue:`19992`)
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index 02462218e8b02..facc709877285 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -6433,9 +6433,11 @@ def clip(self, lower=None, upper=None, axis=None, inplace=False,
         # GH 17276
         # numpy doesn't like NaN as a clip value
         # so ignore
-        if np.any(pd.isnull(lower)):
+        # GH 19992
+        # numpy doesn't drop a list-like bound containing NaN
+        if not is_list_like(lower) and np.any(pd.isnull(lower)):
             lower = None
-        if np.any(pd.isnull(upper)):
+        if not is_list_like(upper) and np.any(pd.isnull(upper)):
             upper = None
 
         # GH 2747 (arguments were reversed)
diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py
index 437d3a9d24730..415ae982673ee 100644
--- a/pandas/tests/frame/test_analytics.py
+++ b/pandas/tests/frame/test_analytics.py
@@ -2195,13 +2195,23 @@ def test_clip_with_na_args(self):
         """Should process np.nan argument as None """
         # GH # 17276
         tm.assert_frame_equal(self.frame.clip(np.nan), self.frame)
-        tm.assert_frame_equal(self.frame.clip(upper=[1, 2, np.nan]),
-                              self.frame)
-        tm.assert_frame_equal(self.frame.clip(lower=[1, np.nan, 3]),
-                              self.frame)
         tm.assert_frame_equal(self.frame.clip(upper=np.nan, lower=np.nan),
                               self.frame)
 
+        # GH #19992
+        df = DataFrame({'col_0': [1, 2, 3], 'col_1': [4, 5, 6],
+                        'col_2': [7, 8, 9]})
+
+        result = df.clip(lower=[4, 5, np.nan], axis=0)
+        expected = DataFrame({'col_0': [4, 5, np.nan], 'col_1': [4, 5, np.nan],
+                              'col_2': [7, 8, np.nan]})
+        tm.assert_frame_equal(result, expected)
+
+        result = df.clip(lower=[4, 5, np.nan], axis=1)
+        expected = DataFrame({'col_0': [4, 4, 4], 'col_1': [5, 5, 6],
+                              'col_2': [np.nan, np.nan, np.nan]})
+        tm.assert_frame_equal(result, expected)
+
     # Matrix-like
     def test_dot(self):
         a = DataFrame(np.random.randn(3, 4), index=['a', 'b', 'c'],
diff --git a/pandas/tests/series/test_analytics.py b/pandas/tests/series/test_analytics.py
index 1e6ea96a5de51..bcf209521f913 100644
--- a/pandas/tests/series/test_analytics.py
+++ b/pandas/tests/series/test_analytics.py
@@ -1140,11 +1140,15 @@ def test_clip_with_na_args(self):
         s = Series([1, 2, 3])
 
         assert_series_equal(s.clip(np.nan), Series([1, 2, 3]))
-        assert_series_equal(s.clip(upper=[1, 1, np.nan]), Series([1, 2, 3]))
-        assert_series_equal(s.clip(lower=[1, np.nan, 1]), Series([1, 2, 3]))
         assert_series_equal(s.clip(upper=np.nan, lower=np.nan),
                             Series([1, 2, 3]))
 
+        # GH #19992
+        assert_series_equal(s.clip(lower=[0, 4, np.nan]),
+                            Series([1, 4, np.nan]))
+        assert_series_equal(s.clip(upper=[1, np.nan, 1]),
+                            Series([1, np.nan, 1]))
+
     def test_clip_against_series(self):
         # GH #6966
 

From 6a0a95058659cec7515b0233d7795417dfb074fe Mon Sep 17 00:00:00 2001
From: MeeseeksMachine <39504233+meeseeksmachine@users.noreply.github.com>
Date: Fri, 20 Jul 2018 05:28:01 -0700
Subject: [PATCH 44/55] Backport PR #21966: Fix memory leak in roll_quantile
 (#21973)

---
 doc/source/whatsnew/v0.23.4.txt | 1 +
 pandas/_libs/window.pyx         | 2 ++
 2 files changed, 3 insertions(+)

diff --git a/doc/source/whatsnew/v0.23.4.txt b/doc/source/whatsnew/v0.23.4.txt
index 5e19ab491647d..a30fbc75f11f8 100644
--- a/doc/source/whatsnew/v0.23.4.txt
+++ b/doc/source/whatsnew/v0.23.4.txt
@@ -27,6 +27,7 @@ Bug Fixes
 **Groupby/Resample/Rolling**
 
 - Bug where calling :func:`DataFrameGroupBy.agg` with a list of functions including ``ohlc`` as the non-initial element would raise a ``ValueError`` (:issue:`21716`)
+- Bug in ``roll_quantile`` caused a memory leak when calling ``.rolling(...).quantile(q)`` with ``q`` in (0,1) (:issue:`21965`)
 -
 
 **Conversion**
diff --git a/pandas/_libs/window.pyx b/pandas/_libs/window.pyx
index 5121d293efcb6..a77433e5d1115 100644
--- a/pandas/_libs/window.pyx
+++ b/pandas/_libs/window.pyx
@@ -1482,6 +1482,8 @@ def roll_quantile(ndarray[float64_t, cast=True] input, int64_t win,
             else:
                 output[i] = NaN
 
+    skiplist_destroy(skiplist)
+
     return output
 
 
From 14e1985f7a34b311cfb57c6f4f1bfe407e64bc75 Mon Sep 17 00:00:00 2001
From: chris-b1 <cbartak@gmail.com>
Date: Thu, 26 Jul 2018 12:32:29 -0500
Subject: [PATCH 45/55] BUG: rolling with MSVC 2017 build (#21813)

* Appveyor 3.7

* ci package list

* change image type

* try hack fix

* lint

* use isnan on problem function

* use numpy compat isnan

* use right isnan

* work around OSX math undefs

* cleanup const

* fix reversion

* ...

(cherry picked from commit 7a2fbce899aad302891ff9a95aeb1bd55efe533a)
---
 appveyor.yml                    |  2 ++
 doc/source/whatsnew/v0.23.4.txt |  2 +-
 pandas/_libs/src/headers/cmath  |  1 +
 pandas/_libs/window.pyx         | 21 +++++++++++----------
 4 files changed, 15 insertions(+), 11 deletions(-)

diff --git a/appveyor.yml b/appveyor.yml
index f70fc829ec971..c6199c1493f22 100644
--- a/appveyor.yml
+++ b/appveyor.yml
@@ -20,12 +20,14 @@ environment:
   matrix:
 
     - CONDA_ROOT: "C:\\Miniconda3_64"
+      APPVEYOR_BUILD_WORKER_IMAGE: Visual Studio 2017
       PYTHON_VERSION: "3.6"
       PYTHON_ARCH: "64"
       CONDA_PY: "36"
       CONDA_NPY: "113"
 
     - CONDA_ROOT: "C:\\Miniconda3_64"
+      APPVEYOR_BUILD_WORKER_IMAGE: Visual Studio 2015
       PYTHON_VERSION: "2.7"
       PYTHON_ARCH: "64"
       CONDA_PY: "27"
diff --git a/doc/source/whatsnew/v0.23.4.txt b/doc/source/whatsnew/v0.23.4.txt
index a30fbc75f11f8..7890d199564f6 100644
--- a/doc/source/whatsnew/v0.23.4.txt
+++ b/doc/source/whatsnew/v0.23.4.txt
@@ -16,7 +16,7 @@ and bug fixes. We recommend that all users upgrade to this version.
 Fixed Regressions
 ~~~~~~~~~~~~~~~~~
 
-- 
+- Python 3.7 with Windows gave all missing values for rolling variance calculations (:issue:`21813`)
 -
 
 .. _whatsnew_0234.bug_fixes:
diff --git a/pandas/_libs/src/headers/cmath b/pandas/_libs/src/headers/cmath
index d8e2239406cae..2bccf9bb13d77 100644
--- a/pandas/_libs/src/headers/cmath
+++ b/pandas/_libs/src/headers/cmath
@@ -6,6 +6,7 @@
 #if defined(_MSC_VER) && (_MSC_VER < 1800)
 #include <cmath>
 namespace std {
+  __inline int isnan(double x) { return _isnan(x); }
   __inline int signbit(double num) { return _copysign(1.0, num) < 0; }
 }
 #else
diff --git a/pandas/_libs/window.pyx b/pandas/_libs/window.pyx
index a77433e5d1115..6954094b46e69 100644
--- a/pandas/_libs/window.pyx
+++ b/pandas/_libs/window.pyx
@@ -14,6 +14,7 @@ cnp.import_array()
 
 
 cdef extern from "../src/headers/cmath" namespace "std":
+    bint isnan(double) nogil
     int signbit(double) nogil
     double sqrt(double x) nogil
 
@@ -654,16 +655,16 @@ cdef inline void add_var(double val, double *nobs, double *mean_x,
                          double *ssqdm_x) nogil:
     """ add a value from the var calc """
     cdef double delta
-
-    # Not NaN
-    if val == val:
-        nobs[0] = nobs[0] + 1
-
-        # a part of Welford's method for the online variance-calculation
-        # https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance
-        delta = val - mean_x[0]
-        mean_x[0] = mean_x[0] + delta / nobs[0]
-        ssqdm_x[0] = ssqdm_x[0] + ((nobs[0] - 1) * delta ** 2) / nobs[0]
+    # `isnan` instead of equality as fix for GH-21813, msvc 2017 bug
+    if isnan(val):
+        return
+
+    nobs[0] = nobs[0] + 1
+    # a part of Welford's method for the online variance-calculation
+    # https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance
+    delta = val - mean_x[0]
+    mean_x[0] = mean_x[0] + delta / nobs[0]
+    ssqdm_x[0] = ssqdm_x[0] + ((nobs[0] - 1) * delta ** 2) / nobs[0]
 
 
 cdef inline void remove_var(double val, double *nobs, double *mean_x,

From 398582616c434330283d82fd029ace7dbd3c6993 Mon Sep 17 00:00:00 2001
From: h-vetinari <33685575+h-vetinari@users.noreply.github.com>
Date: Tue, 17 Jul 2018 14:01:51 +0200
Subject: [PATCH 46/55] DOC add Python2.7 warning to recent whatsnew; include
 23.3 (#21944)

(cherry picked from commit 4802002ab0564ae384e425c074fde688a228a43f)
---
 doc/source/whatsnew/v0.23.1.txt | 5 +++++
 doc/source/whatsnew/v0.23.2.txt | 4 ++++
 doc/source/whatsnew/v0.23.4.txt | 4 ++++
 3 files changed, 13 insertions(+)

diff --git a/doc/source/whatsnew/v0.23.1.txt b/doc/source/whatsnew/v0.23.1.txt
index a52ba22cf36d2..9f8635743ea6a 100644
--- a/doc/source/whatsnew/v0.23.1.txt
+++ b/doc/source/whatsnew/v0.23.1.txt
@@ -6,6 +6,11 @@ v0.23.1
 This is a minor bug-fix release in the 0.23.x series and includes some small regression fixes
 and bug fixes. We recommend that all users upgrade to this version.
 
+.. warning::
+
+   Starting January 1, 2019, pandas feature releases will support Python 3 only.
+   See :ref:`install.dropping-27` for more.
+
 .. contents:: What's new in v0.23.1
     :local:
     :backlinks: none
diff --git a/doc/source/whatsnew/v0.23.2.txt b/doc/source/whatsnew/v0.23.2.txt
index bd86576ad8586..77ad860fc4e8e 100644
--- a/doc/source/whatsnew/v0.23.2.txt
+++ b/doc/source/whatsnew/v0.23.2.txt
@@ -11,6 +11,10 @@ and bug fixes. We recommend that all users upgrade to this version.
    Pandas 0.23.2 is first pandas release that's compatible with
    Python 3.7 (:issue:`20552`)
 
+.. warning::
+
+   Starting January 1, 2019, pandas feature releases will support Python 3 only.
+   See :ref:`install.dropping-27` for more.
 
 .. contents:: What's new in v0.23.2
     :local:
diff --git a/doc/source/whatsnew/v0.23.4.txt b/doc/source/whatsnew/v0.23.4.txt
index 7890d199564f6..c17f4ffdd6b8e 100644
--- a/doc/source/whatsnew/v0.23.4.txt
+++ b/doc/source/whatsnew/v0.23.4.txt
@@ -6,6 +6,10 @@ v0.23.4
 This is a minor bug-fix release in the 0.23.x series and includes some small regression fixes
 and bug fixes. We recommend that all users upgrade to this version.
 
+.. warning::
+
+   Starting January 1, 2019, pandas feature releases will support Python 3 only.
+   See :ref:`install.dropping-27` for more.
 
 .. contents:: What's new in v0.23.4
     :local:

From 12cfef9f80732279687df4ca701967c0ead0a1cf Mon Sep 17 00:00:00 2001
From: Tom Augspurger <TomAugspurger@users.noreply.github.com>
Date: Thu, 2 Aug 2018 15:26:40 -0500
Subject: [PATCH 47/55] 0.23.4 whatsnew (#22177)

(cherry picked from commit e4381b6e7c3cf1c6f424d01e3dc2613710d79b0d)
---
 doc/source/whatsnew/v0.23.4.txt | 36 ++-------------------------------
 1 file changed, 2 insertions(+), 34 deletions(-)

diff --git a/doc/source/whatsnew/v0.23.4.txt b/doc/source/whatsnew/v0.23.4.txt
index c17f4ffdd6b8e..9a3ad3f61ee49 100644
--- a/doc/source/whatsnew/v0.23.4.txt
+++ b/doc/source/whatsnew/v0.23.4.txt
@@ -1,7 +1,7 @@
 .. _whatsnew_0234:
 
-v0.23.4
--------
+v0.23.4 (August 3, 2018)
+------------------------
 
 This is a minor bug-fix release in the 0.23.x series and includes some small regression fixes
 and bug fixes. We recommend that all users upgrade to this version.
@@ -21,7 +21,6 @@ Fixed Regressions
 ~~~~~~~~~~~~~~~~~
 
 - Python 3.7 with Windows gave all missing values for rolling variance calculations (:issue:`21813`)
--
 
 .. _whatsnew_0234.bug_fixes:
 
@@ -32,37 +31,6 @@ Bug Fixes
 
 - Bug where calling :func:`DataFrameGroupBy.agg` with a list of functions including ``ohlc`` as the non-initial element would raise a ``ValueError`` (:issue:`21716`)
 - Bug in ``roll_quantile`` caused a memory leak when calling ``.rolling(...).quantile(q)`` with ``q`` in (0,1) (:issue:`21965`)
--
-
-**Conversion**
-
--
--
-
-**Indexing**
-
--
--
-
-**I/O**
-
--
--
-
-**Categorical**
-
--
--
-
-**Timezones**
-
--
--
-
-**Timedelta**
-
--
--
 
 **Missing**
 

From b9bacc95c013db0c5cb23a6ddc5496c39668a7c4 Mon Sep 17 00:00:00 2001
From: Jeff Reback <jeff@reback.net>
Date: Sat, 28 Jul 2018 09:16:07 -0400
Subject: [PATCH 48/55] TST: skip pytables test with not-updated pytables conda
 package (#22099)

(cherry picked from commit 017e910a90cbb29c0f844f4d6aa966ebb5cd680a)
---
 pandas/tests/io/test_pytables.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/pandas/tests/io/test_pytables.py b/pandas/tests/io/test_pytables.py
index 7dafc9603f96d..3c6b52074763e 100644
--- a/pandas/tests/io/test_pytables.py
+++ b/pandas/tests/io/test_pytables.py
@@ -14,7 +14,7 @@
 from pandas import (Series, DataFrame, Panel, MultiIndex, Int64Index,
                     RangeIndex, Categorical, bdate_range,
                     date_range, timedelta_range, Index, DatetimeIndex,
-                    isna, compat, concat, Timestamp)
+                    isna, compat, concat, Timestamp, _np_version_under1p15)
 
 import pandas.util.testing as tm
 import pandas.util._test_decorators as td
@@ -2140,6 +2140,10 @@ def test_unimplemented_dtypes_table_columns(self):
             # this fails because we have a date in the object block......
             pytest.raises(TypeError, store.append, 'df_unimplemented', df)
 
+    @pytest.mark.skipif(
+        not _np_version_under1p15,
+        reason=("pytables conda build package needs build "
+                "with numpy 1.15: gh-22098"))
     def test_calendar_roundtrip_issue(self):
 
         # 8591

From 0409521665bd436a10aea7e06336066bf07ff057 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Fri, 3 Aug 2018 12:19:26 -0500
Subject: [PATCH 49/55] RLS: 0.23.4


From c420e75851361025c8f20c5d00c44c7feef56d5a Mon Sep 17 00:00:00 2001
From: William Ayd <william.ayd@icloud.com>
Date: Tue, 7 Aug 2018 09:23:03 -0700
Subject: [PATCH 50/55] Added whatsnew for v0.23.5 (#22233)

---
 doc/source/whatsnew/v0.23.5.txt | 39 +++++++++++++++++++++++++++++++++
 1 file changed, 39 insertions(+)
 create mode 100644 doc/source/whatsnew/v0.23.5.txt

diff --git a/doc/source/whatsnew/v0.23.5.txt b/doc/source/whatsnew/v0.23.5.txt
new file mode 100644
index 0000000000000..ee0ee4259f86d
--- /dev/null
+++ b/doc/source/whatsnew/v0.23.5.txt
@@ -0,0 +1,39 @@
+.. _whatsnew_0235:
+
+v0.23.5 (TBD 0, 2018)
+---------------------
+
+This is a minor bug-fix release in the 0.23.x series and includes some small regression fixes
+and bug fixes. We recommend that all users upgrade to this version.
+
+.. warning::
+
+   Starting January 1, 2019, pandas feature releases will support Python 3 only.
+   See :ref:`install.dropping-27` for more.
+
+.. contents:: What's new in v0.23.5
+    :local:
+    :backlinks: none
+
+.. _whatsnew_0235.fixed_regressions:
+
+Fixed Regressions
+~~~~~~~~~~~~~~~~~
+
+-
+-
+
+.. _whatsnew_0235.bug_fixes:
+
+Bug Fixes
+~~~~~~~~~
+
+**Groupby/Resample/Rolling**
+
+-
+-
+
+**Missing**
+
+-
+-

From faa199298eaeb1173571da47eaaecaf3b455c7d3 Mon Sep 17 00:00:00 2001
From: MeeseeksMachine <39504233+meeseeksmachine@users.noreply.github.com>
Date: Thu, 9 Aug 2018 08:45:12 -0600
Subject: [PATCH 51/55] Backport PR #22169: BUG: Fix using "inf"/"-inf" in
 na_values for csv with int index column (#22259)

---
 doc/source/whatsnew/v0.23.5.txt     |  4 ++++
 pandas/core/algorithms.py           |  4 ++--
 pandas/tests/io/parser/na_values.py | 11 +++++++++++
 3 files changed, 17 insertions(+), 2 deletions(-)

diff --git a/doc/source/whatsnew/v0.23.5.txt b/doc/source/whatsnew/v0.23.5.txt
index ee0ee4259f86d..6a36adb915b3c 100644
--- a/doc/source/whatsnew/v0.23.5.txt
+++ b/doc/source/whatsnew/v0.23.5.txt
@@ -37,3 +37,7 @@ Bug Fixes
 
 -
 -
+
+**I/O**
+
+- Bug in :func:`read_csv` that caused it to raise ``OverflowError`` when trying to use 'inf' as ``na_value`` with integer index column (:issue:`17128`)
diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py
index bcde32696c1ff..9d8d208d2d5c1 100644
--- a/pandas/core/algorithms.py
+++ b/pandas/core/algorithms.py
@@ -95,7 +95,7 @@ def _ensure_data(values, dtype=None):
                 values = _ensure_float64(values)
             return values, 'float64', 'float64'
 
-    except (TypeError, ValueError):
+    except (TypeError, ValueError, OverflowError):
         # if we are trying to coerce to a dtype
         # and it is incompat this will fall thru to here
         return _ensure_object(values), 'object', 'object'
@@ -429,7 +429,7 @@ def isin(comps, values):
             values = values.astype('int64', copy=False)
             comps = comps.astype('int64', copy=False)
             f = lambda x, y: htable.ismember_int64(x, y)
-        except (TypeError, ValueError):
+        except (TypeError, ValueError, OverflowError):
             values = values.astype(object)
             comps = comps.astype(object)
 
diff --git a/pandas/tests/io/parser/na_values.py b/pandas/tests/io/parser/na_values.py
index d2c3f82e95c4d..cc224efd533b7 100644
--- a/pandas/tests/io/parser/na_values.py
+++ b/pandas/tests/io/parser/na_values.py
@@ -369,3 +369,14 @@ def test_no_na_filter_on_index(self):
         expected = DataFrame({"a": [1, 4], "c": [3, 6]},
                              index=Index([np.nan, 5.0], name="b"))
         tm.assert_frame_equal(out, expected)
+
+    def test_inf_na_values_with_int_index(self):
+        # see gh-17128
+        data = "idx,col1,col2\n1,3,4\n2,inf,-inf"
+
+        # Don't fail with OverflowError with infs and integer index column
+        out = self.read_csv(StringIO(data), index_col=[0],
+                            na_values=['inf', '-inf'])
+        expected = DataFrame({"col1": [3, np.nan], "col2": [4, np.nan]},
+                             index=Index([1, 2], name="idx"))
+        tm.assert_frame_equal(out, expected)

From 11c0523f8fffe33131890d6bd2c71f8edacea5c4 Mon Sep 17 00:00:00 2001
From: MeeseeksMachine <39504233+meeseeksmachine@users.noreply.github.com>
Date: Thu, 9 Aug 2018 08:45:29 -0600
Subject: [PATCH 52/55] Backport PR #22253: Resampling with NaT in
 TimedeltaIndex raises MemoryError (#22258)

---
 doc/source/whatsnew/v0.23.5.txt |  2 +-
 pandas/core/resample.py         |  3 +--
 pandas/tests/test_resample.py   | 10 ++++++++++
 3 files changed, 12 insertions(+), 3 deletions(-)

diff --git a/doc/source/whatsnew/v0.23.5.txt b/doc/source/whatsnew/v0.23.5.txt
index 6a36adb915b3c..304ab12752ad4 100644
--- a/doc/source/whatsnew/v0.23.5.txt
+++ b/doc/source/whatsnew/v0.23.5.txt
@@ -30,7 +30,7 @@ Bug Fixes
 
 **Groupby/Resample/Rolling**
 
--
+- Bug in :meth:`DataFrame.resample` when resampling ``NaT`` in ``TimeDeltaIndex`` (:issue:`13223`).
 -
 
 **Missing**
diff --git a/pandas/core/resample.py b/pandas/core/resample.py
index 0707cc756682e..e6b9f88c52cd7 100644
--- a/pandas/core/resample.py
+++ b/pandas/core/resample.py
@@ -1383,8 +1383,7 @@ def _get_time_delta_bins(self, ax):
                 data=[], freq=self.freq, name=ax.name)
             return binner, [], labels
 
-        start = ax[0]
-        end = ax[-1]
+        start, end = ax.min(), ax.max()
         labels = binner = TimedeltaIndex(start=start,
                                          end=end,
                                          freq=self.freq,
diff --git a/pandas/tests/test_resample.py b/pandas/tests/test_resample.py
index c1257cce9a9a4..bcc50a25623a1 100644
--- a/pandas/tests/test_resample.py
+++ b/pandas/tests/test_resample.py
@@ -2870,6 +2870,16 @@ def test_asfreq_bug(self):
                                                    freq='1T'))
         assert_frame_equal(result, expected)
 
+    def test_resample_with_nat(self):
+        # GH 13223
+        index = pd.to_timedelta(['0s', pd.NaT, '2s'])
+        result = DataFrame({'value': [2, 3, 5]}, index).resample('1s').mean()
+        expected = DataFrame({'value': [2.5, np.nan, 5.0]},
+                             index=timedelta_range('0 day',
+                                                   periods=3,
+                                                   freq='1S'))
+        assert_frame_equal(result, expected)
+
 
 class TestResamplerGrouper(object):
 

From 932de54ac027b9cc8147642ea4448a63fdda33b2 Mon Sep 17 00:00:00 2001
From: MeeseeksMachine <39504233+meeseeksmachine@users.noreply.github.com>
Date: Mon, 20 Aug 2018 04:04:28 -0700
Subject: [PATCH 53/55] Backport PR #22424: CI: add missing tzlocal dependency
 (rpy2, doc build) (#22425)

---
 ci/travis-36-doc.yaml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/ci/travis-36-doc.yaml b/ci/travis-36-doc.yaml
index c22dddbe0ba3f..8705b82412e7c 100644
--- a/ci/travis-36-doc.yaml
+++ b/ci/travis-36-doc.yaml
@@ -36,6 +36,7 @@ dependencies:
   - sphinx
   - sqlalchemy
   - statsmodels
+  - tzlocal
   - xarray
   - xlrd
   - xlsxwriter

From 183e92f1309a15e34c890e6c18dd5c7c53f61210 Mon Sep 17 00:00:00 2001
From: gfyoung <gfyoung17+GitHub@gmail.com>
Date: Tue, 11 Sep 2018 09:40:58 -0700
Subject: [PATCH 54/55] CI / BLD: Various CI Backports (#22637)

* CI: Bump NumPy to 1.9.3

Backport of gh-22499.

* BLD: Fix openpyxl to 2.5.5

Backport of gh-22601.

* CI: Resolve timeout issue on Travis

Backported from gh-22429.

* CI: Migrate to CircleCI 2.0

Backport of gh-21814.

* Upgrade Cython to >=0.28.2

Backported from gh-21688.

* TST: Patch locale handling

Backported from gh-21739.
Backport of gh-22213.
---
 .circleci/config.yml                          | 147 ++++++++++++++++++
 ci/appveyor-27.yaml                           |   2 +-
 ci/appveyor-36.yaml                           |   2 +-
 ci/circle-27-compat.yaml                      |   6 +-
 ci/circle-35-ascii.yaml                       |   2 +-
 ci/circle-36-locale.yaml                      |   2 +-
 ci/circle-36-locale_slow.yaml                 |   2 +-
 ci/install_circle.sh                          |  19 +--
 ci/install_db_circle.sh                       |   8 -
 ci/requirements-optional-conda.txt            |   2 +-
 ci/requirements-optional-pip.txt              |   4 +-
 ci/run_circle.sh                              |   2 +-
 ci/travis-27-locale.yaml                      |   2 +-
 ci/travis-27.yaml                             |   1 +
 ci/travis-35-osx.yaml                         |   2 +-
 ci/travis-36-doc.yaml                         |   2 +-
 ci/travis-36-slow.yaml                        |   2 +-
 ci/travis-36.yaml                             |   2 +-
 circle.yml                                    |  38 -----
 pandas/tests/indexes/datetimes/test_misc.py   |  19 ++-
 pandas/tests/io/json/test_compression.py      |   2 +
 pandas/tests/io/json/test_pandas.py           |   2 +
 pandas/tests/io/parser/test_network.py        |   2 +
 pandas/tests/io/test_excel.py                 |   1 +
 .../tests/scalar/timestamp/test_timestamp.py  |  20 ++-
 pandas/tests/series/test_datetime_values.py   |  20 ++-
 pandas/tests/util/test_util.py                |  22 ++-
 pandas/util/testing.py                        |  32 ++--
 28 files changed, 272 insertions(+), 95 deletions(-)
 create mode 100644 .circleci/config.yml
 delete mode 100755 ci/install_db_circle.sh
 delete mode 100644 circle.yml

diff --git a/.circleci/config.yml b/.circleci/config.yml
new file mode 100644
index 0000000000000..e947f30d285cd
--- /dev/null
+++ b/.circleci/config.yml
@@ -0,0 +1,147 @@
+version: 2
+jobs:
+
+  # --------------------------------------------------------------------------
+  # 0. py27_compat
+  # --------------------------------------------------------------------------
+  py27_compat:
+    docker:
+      - image: continuumio/miniconda:latest
+      # databases configuration
+      - image: circleci/postgres:9.6.5-alpine-ram
+        environment:
+            POSTGRES_USER: postgres
+            POSTGRES_DB: pandas_nosetest
+      - image: circleci/mysql:8-ram
+        environment:
+          MYSQL_USER: "root"
+          MYSQL_HOST: "localhost"
+          MYSQL_ALLOW_EMPTY_PASSWORD: "true"
+          MYSQL_DATABASE: "pandas_nosetest"
+    environment:
+      JOB: "2.7_COMPAT"
+      ENV_FILE: "ci/circle-27-compat.yaml"
+      LOCALE_OVERRIDE: "it_IT.UTF-8"
+      MINICONDA_DIR: /home/ubuntu/miniconda3
+    steps:
+      - checkout
+      - run:
+          name: build
+          command: |
+            ./ci/install_circle.sh
+            ./ci/show_circle.sh
+      - run:
+          name: test
+          command: ./ci/run_circle.sh  --skip-slow --skip-network
+
+  # --------------------------------------------------------------------------
+  # 1. py36_locale
+  # --------------------------------------------------------------------------
+  py36_locale:
+    docker:
+      - image: continuumio/miniconda:latest
+      # databases configuration
+      - image: circleci/postgres:9.6.5-alpine-ram
+        environment:
+            POSTGRES_USER: postgres
+            POSTGRES_DB: pandas_nosetest
+      - image: circleci/mysql:8-ram
+        environment:
+          MYSQL_USER: "root"
+          MYSQL_HOST: "localhost"
+          MYSQL_ALLOW_EMPTY_PASSWORD: "true"
+          MYSQL_DATABASE: "pandas_nosetest"
+
+    environment:
+      JOB: "3.6_LOCALE"
+      ENV_FILE: "ci/circle-36-locale.yaml"
+      LOCALE_OVERRIDE: "zh_CN.UTF-8"
+      MINICONDA_DIR: /home/ubuntu/miniconda3
+    steps:
+      - checkout
+      - run:
+          name: build
+          command: |
+            ./ci/install_circle.sh
+            ./ci/show_circle.sh
+      - run:
+          name: test
+          command: ./ci/run_circle.sh  --skip-slow --skip-network
+
+  # --------------------------------------------------------------------------
+  # 2. py36_locale_slow
+  # --------------------------------------------------------------------------
+  py36_locale_slow:
+    docker:
+      - image: continuumio/miniconda:latest
+      # databases configuration
+      - image: circleci/postgres:9.6.5-alpine-ram
+        environment:
+            POSTGRES_USER: postgres
+            POSTGRES_DB: pandas_nosetest
+      - image: circleci/mysql:8-ram
+        environment:
+          MYSQL_USER: "root"
+          MYSQL_HOST: "localhost"
+          MYSQL_ALLOW_EMPTY_PASSWORD: "true"
+          MYSQL_DATABASE: "pandas_nosetest"
+
+    environment:
+      JOB: "3.6_LOCALE_SLOW"
+      ENV_FILE: "ci/circle-36-locale_slow.yaml"
+      LOCALE_OVERRIDE: "zh_CN.UTF-8"
+      MINICONDA_DIR: /home/ubuntu/miniconda3
+    steps:
+      - checkout
+      - run:
+          name: build
+          command: |
+            ./ci/install_circle.sh
+            ./ci/show_circle.sh
+      - run:
+          name: test
+          command: ./ci/run_circle.sh  --only-slow --skip-network
+
+  # --------------------------------------------------------------------------
+  # 3. py35_ascii
+  # --------------------------------------------------------------------------
+  py35_ascii:
+    docker:
+      - image: continuumio/miniconda:latest
+      # databases configuration
+      - image: circleci/postgres:9.6.5-alpine-ram
+        environment:
+            POSTGRES_USER: postgres
+            POSTGRES_DB: pandas_nosetest
+      - image: circleci/mysql:8-ram
+        environment:
+          MYSQL_USER: "root"
+          MYSQL_HOST: "localhost"
+          MYSQL_ALLOW_EMPTY_PASSWORD: "true"
+          MYSQL_DATABASE: "pandas_nosetest"
+
+    environment:
+      JOB: "3.5_ASCII"
+      ENV_FILE: "ci/circle-35-ascii.yaml"
+      LOCALE_OVERRIDE: "C"
+      MINICONDA_DIR: /home/ubuntu/miniconda3
+    steps:
+      - checkout
+      - run:
+          name: build
+          command: |
+            ./ci/install_circle.sh
+            ./ci/show_circle.sh
+      - run:
+          name: test
+          command: ./ci/run_circle.sh  --skip-slow --skip-network
+
+
+workflows:
+  version: 2
+  build_and_test:
+    jobs:
+      - py27_compat
+      - py36_locale
+      - py36_locale_slow
+      - py35_ascii
diff --git a/ci/appveyor-27.yaml b/ci/appveyor-27.yaml
index 84107c605b14f..e47ebf75344fa 100644
--- a/ci/appveyor-27.yaml
+++ b/ci/appveyor-27.yaml
@@ -12,7 +12,7 @@ dependencies:
   - matplotlib
   - numexpr
   - numpy=1.10*
-  - openpyxl
+  - openpyxl=2.5.5
   - pytables==3.2.2
   - python=2.7.*
   - pytz
diff --git a/ci/appveyor-36.yaml b/ci/appveyor-36.yaml
index 5e370de39958a..d007f04ca0720 100644
--- a/ci/appveyor-36.yaml
+++ b/ci/appveyor-36.yaml
@@ -10,7 +10,7 @@ dependencies:
   - matplotlib
   - numexpr
   - numpy=1.13*
-  - openpyxl
+  - openpyxl=2.5.5
   - pyarrow
   - pytables
   - python-dateutil
diff --git a/ci/circle-27-compat.yaml b/ci/circle-27-compat.yaml
index 81a48d4edf11c..e037877819b14 100644
--- a/ci/circle-27-compat.yaml
+++ b/ci/circle-27-compat.yaml
@@ -4,11 +4,11 @@ channels:
   - conda-forge
 dependencies:
   - bottleneck=1.0.0
-  - cython=0.24
+  - cython=0.28.2
   - jinja2=2.8
   - numexpr=2.4.4 # we test that we correctly don't use an unsupported numexpr
-  - numpy=1.9.2
-  - openpyxl
+  - numpy=1.9.3
+  - openpyxl=2.5.5
   - psycopg2
   - pytables=3.2.2
   - python-dateutil=2.5.0
diff --git a/ci/circle-35-ascii.yaml b/ci/circle-35-ascii.yaml
index 602c414b49bb2..745678791458d 100644
--- a/ci/circle-35-ascii.yaml
+++ b/ci/circle-35-ascii.yaml
@@ -2,7 +2,7 @@ name: pandas
 channels:
   - defaults
 dependencies:
-  - cython
+  - cython>=0.28.2
   - nomkl
   - numpy
   - python-dateutil
diff --git a/ci/circle-36-locale.yaml b/ci/circle-36-locale.yaml
index cc852c1e2aeeb..a85e0b58f5e33 100644
--- a/ci/circle-36-locale.yaml
+++ b/ci/circle-36-locale.yaml
@@ -13,7 +13,7 @@ dependencies:
   - nomkl
   - numexpr
   - numpy
-  - openpyxl
+  - openpyxl=2.5.5
   - psycopg2
   - pymysql
   - pytables
diff --git a/ci/circle-36-locale_slow.yaml b/ci/circle-36-locale_slow.yaml
index cc852c1e2aeeb..a85e0b58f5e33 100644
--- a/ci/circle-36-locale_slow.yaml
+++ b/ci/circle-36-locale_slow.yaml
@@ -13,7 +13,7 @@ dependencies:
   - nomkl
   - numexpr
   - numpy
-  - openpyxl
+  - openpyxl=2.5.5
   - psycopg2
   - pymysql
   - pytables
diff --git a/ci/install_circle.sh b/ci/install_circle.sh
index 5ffff84c88488..f8bcf6bcffc99 100755
--- a/ci/install_circle.sh
+++ b/ci/install_circle.sh
@@ -6,14 +6,7 @@ echo "[home_dir: $home_dir]"
 echo "[ls -ltr]"
 ls -ltr
 
-echo "[Using clean Miniconda install]"
-rm -rf "$MINICONDA_DIR"
-
-# install miniconda
-wget http://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -q -O miniconda.sh || exit 1
-bash miniconda.sh -b -p "$MINICONDA_DIR" || exit 1
-
-export PATH="$MINICONDA_DIR/bin:$PATH"
+apt-get update -y && apt-get install -y build-essential postgresql-client-9.6
 
 echo "[update conda]"
 conda config --set ssl_verify false || exit 1
@@ -48,9 +41,17 @@ source $ENVS_FILE
 
 # edit the locale override if needed
 if [ -n "$LOCALE_OVERRIDE" ]; then
+
+    apt-get update && apt-get -y install locales locales-all
+
+    export LANG=$LOCALE_OVERRIDE
+    export LC_ALL=$LOCALE_OVERRIDE
+
+    python -c "import locale; locale.setlocale(locale.LC_ALL, \"$LOCALE_OVERRIDE\")" || exit 1;
+
     echo "[Adding locale to the first line of pandas/__init__.py]"
     rm -f pandas/__init__.pyc
-    sedc="3iimport locale\nlocale.setlocale(locale.LC_ALL, '$LOCALE_OVERRIDE')\n"
+    sedc="3iimport locale\nlocale.setlocale(locale.LC_ALL, \"$LOCALE_OVERRIDE\")\n"
     sed -i "$sedc" pandas/__init__.py
     echo "[head -4 pandas/__init__.py]"
     head -4 pandas/__init__.py
diff --git a/ci/install_db_circle.sh b/ci/install_db_circle.sh
deleted file mode 100755
index a00f74f009f54..0000000000000
--- a/ci/install_db_circle.sh
+++ /dev/null
@@ -1,8 +0,0 @@
-#!/bin/bash
-
-echo "installing dbs"
-mysql -e 'create database pandas_nosetest;'
-psql -c 'create database pandas_nosetest;' -U postgres
-
-echo "done"
-exit 0
diff --git a/ci/requirements-optional-conda.txt b/ci/requirements-optional-conda.txt
index e8cfcdf80f2e8..ca60c772392e7 100644
--- a/ci/requirements-optional-conda.txt
+++ b/ci/requirements-optional-conda.txt
@@ -11,7 +11,7 @@ lxml
 matplotlib
 nbsphinx
 numexpr
-openpyxl
+openpyxl=2.5.5
 pyarrow
 pymysql
 pytables
diff --git a/ci/requirements-optional-pip.txt b/ci/requirements-optional-pip.txt
index 877c52fa0b4fd..a6009c270c2a6 100644
--- a/ci/requirements-optional-pip.txt
+++ b/ci/requirements-optional-pip.txt
@@ -13,7 +13,7 @@ lxml
 matplotlib
 nbsphinx
 numexpr
-openpyxl
+openpyxl=2.5.5
 pyarrow
 pymysql
 tables
@@ -26,4 +26,4 @@ sqlalchemy
 xarray
 xlrd
 xlsxwriter
-xlwt
\ No newline at end of file
+xlwt
diff --git a/ci/run_circle.sh b/ci/run_circle.sh
index 435985bd42148..fc2a8b849a354 100755
--- a/ci/run_circle.sh
+++ b/ci/run_circle.sh
@@ -6,4 +6,4 @@ export PATH="$MINICONDA_DIR/bin:$PATH"
 source activate pandas
 
 echo "pytest --strict --junitxml=$CIRCLE_TEST_REPORTS/reports/junit.xml $@ pandas"
-pytest --strict --junitxml=$CIRCLE_TEST_REPORTS/reports/junit.xml $@ pandas
+pytest --strict --color=no --junitxml=$CIRCLE_TEST_REPORTS/reports/junit.xml $@ pandas
diff --git a/ci/travis-27-locale.yaml b/ci/travis-27-locale.yaml
index 1312c1296d46a..eacae4630edeb 100644
--- a/ci/travis-27-locale.yaml
+++ b/ci/travis-27-locale.yaml
@@ -7,7 +7,7 @@ dependencies:
   - cython=0.24
   - lxml
   - matplotlib=1.4.3
-  - numpy=1.9.2
+  - numpy=1.9.3
   - openpyxl=2.4.0
   - python-dateutil
   - python-blosc
diff --git a/ci/travis-27.yaml b/ci/travis-27.yaml
index 22b993a2da886..26a520a16a4cc 100644
--- a/ci/travis-27.yaml
+++ b/ci/travis-27.yaml
@@ -27,6 +27,7 @@ dependencies:
   - PyCrypto
   - pymysql=0.6.3
   - pytables
+  - blosc=1.14.3
   - python-blosc
   - python-dateutil=2.5.0
   - python=2.7*
diff --git a/ci/travis-35-osx.yaml b/ci/travis-35-osx.yaml
index e74abac4c9775..5722d91781999 100644
--- a/ci/travis-35-osx.yaml
+++ b/ci/travis-35-osx.yaml
@@ -12,7 +12,7 @@ dependencies:
   - nomkl
   - numexpr
   - numpy=1.10.4
-  - openpyxl
+  - openpyxl=2.5.5
   - pytables
   - python=3.5*
   - pytz
diff --git a/ci/travis-36-doc.yaml b/ci/travis-36-doc.yaml
index 8705b82412e7c..05ff26020ac7d 100644
--- a/ci/travis-36-doc.yaml
+++ b/ci/travis-36-doc.yaml
@@ -21,7 +21,7 @@ dependencies:
   - notebook
   - numexpr
   - numpy=1.13*
-  - openpyxl
+  - openpyxl=2.5.5
   - pandoc
   - pyqt
   - pytables
diff --git a/ci/travis-36-slow.yaml b/ci/travis-36-slow.yaml
index 6c475dc48723c..ae6353216cc2d 100644
--- a/ci/travis-36-slow.yaml
+++ b/ci/travis-36-slow.yaml
@@ -10,7 +10,7 @@ dependencies:
   - matplotlib
   - numexpr
   - numpy
-  - openpyxl
+  - openpyxl=2.5.5
   - patsy
   - psycopg2
   - pymysql
diff --git a/ci/travis-36.yaml b/ci/travis-36.yaml
index 006276ba1a65f..83f963b9d9b6d 100644
--- a/ci/travis-36.yaml
+++ b/ci/travis-36.yaml
@@ -17,7 +17,7 @@ dependencies:
   - nomkl
   - numexpr
   - numpy
-  - openpyxl
+  - openpyxl=2.5.5
   - psycopg2
   - pyarrow
   - pymysql
diff --git a/circle.yml b/circle.yml
deleted file mode 100644
index 66415defba6fe..0000000000000
--- a/circle.yml
+++ /dev/null
@@ -1,38 +0,0 @@
-machine:
-  environment:
-    # these are globally set
-    MINICONDA_DIR: /home/ubuntu/miniconda3
-
-
-database:
-  override:
-    - ./ci/install_db_circle.sh
-
-
-checkout:
-  post:
-    # since circleci does a shallow fetch
-    # we need to populate our tags
-    - git fetch --depth=1000
-
-
-dependencies:
-  override:
-    - >
-      case $CIRCLE_NODE_INDEX in
-       0)
-         sudo apt-get install language-pack-it && ./ci/install_circle.sh JOB="2.7_COMPAT" ENV_FILE="ci/circle-27-compat.yaml" LOCALE_OVERRIDE="it_IT.UTF-8" ;;
-       1)
-         sudo apt-get install language-pack-zh-hans && ./ci/install_circle.sh JOB="3.6_LOCALE" ENV_FILE="ci/circle-36-locale.yaml" LOCALE_OVERRIDE="zh_CN.UTF-8" ;;
-       2)
-         sudo apt-get install language-pack-zh-hans && ./ci/install_circle.sh JOB="3.6_LOCALE_SLOW" ENV_FILE="ci/circle-36-locale_slow.yaml" LOCALE_OVERRIDE="zh_CN.UTF-8" ;;
-       3)
-         ./ci/install_circle.sh JOB="3.5_ASCII" ENV_FILE="ci/circle-35-ascii.yaml" LOCALE_OVERRIDE="C" ;;
-      esac
-    - ./ci/show_circle.sh
-
-
-test:
-  override:
-      - case $CIRCLE_NODE_INDEX in 0) ./ci/run_circle.sh --skip-slow --skip-network ;; 1) ./ci/run_circle.sh --only-slow --skip-network ;; 2) ./ci/run_circle.sh --skip-slow --skip-network ;; 3) ./ci/run_circle.sh --skip-slow --skip-network ;; esac:
-          parallel: true
diff --git a/pandas/tests/indexes/datetimes/test_misc.py b/pandas/tests/indexes/datetimes/test_misc.py
index 056924f2c6663..743cbc107cce5 100644
--- a/pandas/tests/indexes/datetimes/test_misc.py
+++ b/pandas/tests/indexes/datetimes/test_misc.py
@@ -1,5 +1,6 @@
 import locale
 import calendar
+import unicodedata
 
 import pytest
 
@@ -7,7 +8,7 @@
 import pandas as pd
 import pandas.util.testing as tm
 from pandas import (Index, DatetimeIndex, datetime, offsets,
-                    date_range, Timestamp)
+                    date_range, Timestamp, compat)
 
 
 class TestTimeSeries(object):
@@ -284,10 +285,24 @@ def test_datetime_name_accessors(self, time_locale):
         dti = DatetimeIndex(freq='M', start='2012', end='2013')
         result = dti.month_name(locale=time_locale)
         expected = Index([month.capitalize() for month in expected_months])
+
+        # work around different normalization schemes
+        # https://github.com/pandas-dev/pandas/issues/22342
+        if not compat.PY2:
+            result = result.str.normalize("NFD")
+            expected = expected.str.normalize("NFD")
+
         tm.assert_index_equal(result, expected)
+
         for date, expected in zip(dti, expected_months):
             result = date.month_name(locale=time_locale)
-            assert result == expected.capitalize()
+            expected = expected.capitalize()
+
+            if not compat.PY2:
+                result = unicodedata.normalize("NFD", result)
+                expected = unicodedata.normalize("NFD", result)
+
+            assert result == expected
         dti = dti.append(DatetimeIndex([pd.NaT]))
         assert np.isnan(dti.month_name(locale=time_locale)[-1])
 
diff --git a/pandas/tests/io/json/test_compression.py b/pandas/tests/io/json/test_compression.py
index 05ceace20f5a4..1b9cbc57865d2 100644
--- a/pandas/tests/io/json/test_compression.py
+++ b/pandas/tests/io/json/test_compression.py
@@ -2,6 +2,7 @@
 
 import pandas as pd
 import pandas.util.testing as tm
+import pandas.util._test_decorators as td
 from pandas.util.testing import assert_frame_equal, assert_raises_regex
 
 
@@ -31,6 +32,7 @@ def test_read_zipped_json(datapath):
     assert_frame_equal(uncompressed_df, compressed_df)
 
 
+@td.skip_if_not_us_locale
 def test_with_s3_url(compression):
     boto3 = pytest.importorskip('boto3')
     pytest.importorskip('s3fs')
diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py
index bcbac4400c953..b5a2be87de1c4 100644
--- a/pandas/tests/io/json/test_pandas.py
+++ b/pandas/tests/io/json/test_pandas.py
@@ -15,6 +15,7 @@
                                  assert_series_equal, network,
                                  ensure_clean, assert_index_equal)
 import pandas.util.testing as tm
+import pandas.util._test_decorators as td
 
 _seriesd = tm.getSeriesData()
 _tsd = tm.getTimeSeriesData()
@@ -1040,6 +1041,7 @@ def test_read_inline_jsonl(self):
         expected = DataFrame([[1, 2], [1, 2]], columns=['a', 'b'])
         assert_frame_equal(result, expected)
 
+    @td.skip_if_not_us_locale
     def test_read_s3_jsonl(self, s3_resource):
         # GH17200
 
diff --git a/pandas/tests/io/parser/test_network.py b/pandas/tests/io/parser/test_network.py
index e2243b8087a5b..72d2c5fd8d18f 100644
--- a/pandas/tests/io/parser/test_network.py
+++ b/pandas/tests/io/parser/test_network.py
@@ -55,10 +55,12 @@ def tips_df(datapath):
 
 
 @pytest.mark.usefixtures("s3_resource")
+@td.skip_if_not_us_locale()
 class TestS3(object):
 
     def test_parse_public_s3_bucket(self, tips_df):
         pytest.importorskip('s3fs')
+
         # more of an integration test due to the not-public contents portion
         # can probably mock this though.
         for ext, comp in [('', None), ('.gz', 'gzip'), ('.bz2', 'bz2')]:
diff --git a/pandas/tests/io/test_excel.py b/pandas/tests/io/test_excel.py
index 4e2b2af0ebfe7..20f403e71fd36 100644
--- a/pandas/tests/io/test_excel.py
+++ b/pandas/tests/io/test_excel.py
@@ -576,6 +576,7 @@ def test_read_from_http_url(self, ext):
         tm.assert_frame_equal(url_table, local_table)
 
     @td.skip_if_no('s3fs')
+    @td.skip_if_not_us_locale
     def test_read_from_s3_url(self, ext):
         boto3 = pytest.importorskip('boto3')
         moto = pytest.importorskip('moto')
diff --git a/pandas/tests/scalar/timestamp/test_timestamp.py b/pandas/tests/scalar/timestamp/test_timestamp.py
index 4689c7bea626f..e829506e95b53 100644
--- a/pandas/tests/scalar/timestamp/test_timestamp.py
+++ b/pandas/tests/scalar/timestamp/test_timestamp.py
@@ -5,6 +5,7 @@
 import dateutil
 import calendar
 import locale
+import unicodedata
 import numpy as np
 
 from dateutil.tz import tzutc
@@ -20,7 +21,7 @@
 from pandas._libs.tslibs.timezones import get_timezone, dateutil_gettz as gettz
 
 from pandas.errors import OutOfBoundsDatetime
-from pandas.compat import long, PY3
+from pandas.compat import long, PY3, PY2
 from pandas.compat.numpy import np_datetime64_compat
 from pandas import Timestamp, Period, Timedelta, NaT
 
@@ -116,8 +117,21 @@ def test_names(self, data, time_locale):
                 expected_day = calendar.day_name[0].capitalize()
                 expected_month = calendar.month_name[8].capitalize()
 
-        assert data.day_name(time_locale) == expected_day
-        assert data.month_name(time_locale) == expected_month
+        result_day = data.day_name(time_locale)
+        result_month = data.month_name(time_locale)
+
+        # Work around https://github.com/pandas-dev/pandas/issues/22342
+        # different normalizations
+
+        if not PY2:
+            expected_day = unicodedata.normalize("NFD", expected_day)
+            expected_month = unicodedata.normalize("NFD", expected_month)
+
+            result_day = unicodedata.normalize("NFD", result_day,)
+            result_month = unicodedata.normalize("NFD", result_month)
+
+        assert result_day == expected_day
+        assert result_month == expected_month
 
         # Test NaT
         nan_ts = Timestamp(NaT)
diff --git a/pandas/tests/series/test_datetime_values.py b/pandas/tests/series/test_datetime_values.py
index 47798d0ddd7f5..5e924ac5c8894 100644
--- a/pandas/tests/series/test_datetime_values.py
+++ b/pandas/tests/series/test_datetime_values.py
@@ -3,6 +3,7 @@
 
 import locale
 import calendar
+import unicodedata
 import pytest
 
 from datetime import datetime, date
@@ -13,7 +14,8 @@
 from pandas.core.dtypes.common import is_integer_dtype, is_list_like
 from pandas import (Index, Series, DataFrame, bdate_range,
                     date_range, period_range, timedelta_range,
-                    PeriodIndex, DatetimeIndex, TimedeltaIndex)
+                    PeriodIndex, DatetimeIndex, TimedeltaIndex,
+                    compat)
 import pandas.core.common as com
 
 from pandas.util.testing import assert_series_equal
@@ -309,10 +311,24 @@ def test_dt_accessor_datetime_name_accessors(self, time_locale):
         s = Series(DatetimeIndex(freq='M', start='2012', end='2013'))
         result = s.dt.month_name(locale=time_locale)
         expected = Series([month.capitalize() for month in expected_months])
+
+        # work around https://github.com/pandas-dev/pandas/issues/22342
+        if not compat.PY2:
+            result = result.str.normalize("NFD")
+            expected = expected.str.normalize("NFD")
+
         tm.assert_series_equal(result, expected)
+
         for s_date, expected in zip(s, expected_months):
             result = s_date.month_name(locale=time_locale)
-            assert result == expected.capitalize()
+            expected = expected.capitalize()
+
+            if not compat.PY2:
+                result = unicodedata.normalize("NFD", result)
+                expected = unicodedata.normalize("NFD", expected)
+
+            assert result == expected
+
         s = s.append(Series([pd.NaT]))
         assert np.isnan(s.dt.month_name(locale=time_locale).iloc[-1])
 
diff --git a/pandas/tests/util/test_util.py b/pandas/tests/util/test_util.py
index 145be7f85b193..c049dfc874940 100644
--- a/pandas/tests/util/test_util.py
+++ b/pandas/tests/util/test_util.py
@@ -433,6 +433,26 @@ def teardown_class(cls):
         del cls.locales
         del cls.current_locale
 
+    def test_can_set_locale_valid_set(self):
+        # Setting the default locale should return True
+        assert tm.can_set_locale('') is True
+
+    def test_can_set_locale_invalid_set(self):
+        # Setting an invalid locale should return False
+        assert tm.can_set_locale('non-existent_locale') is False
+
+    def test_can_set_locale_invalid_get(self, monkeypatch):
+        # In some cases, an invalid locale can be set,
+        # but a subsequent getlocale() raises a ValueError
+        # See GH 22129
+
+        def mockgetlocale():
+            raise ValueError()
+
+        with monkeypatch.context() as m:
+            m.setattr(locale, 'getlocale', mockgetlocale)
+            assert tm.can_set_locale('') is False
+
     def test_get_locales(self):
         # all systems should have at least a single locale
         assert len(tm.get_locales()) > 0
@@ -466,7 +486,7 @@ def test_set_locale(self):
         enc = codecs.lookup(enc).name
         new_locale = lang, enc
 
-        if not tm._can_set_locale(new_locale):
+        if not tm.can_set_locale(new_locale):
             with pytest.raises(locale.Error):
                 with tm.set_locale(new_locale):
                     pass
diff --git a/pandas/util/testing.py b/pandas/util/testing.py
index b7edbff00a4b9..bb79c25126fab 100644
--- a/pandas/util/testing.py
+++ b/pandas/util/testing.py
@@ -478,6 +478,8 @@ def set_locale(new_locale, lc_var=locale.LC_ALL):
         A string of the form <language_country>.<encoding>. For example to set
         the current locale to US English with a UTF8 encoding, you would pass
         "en_US.UTF-8".
+    lc_var : int, default `locale.LC_ALL`
+        The category of the locale being set.
 
     Notes
     -----
@@ -489,37 +491,37 @@ def set_locale(new_locale, lc_var=locale.LC_ALL):
 
     try:
         locale.setlocale(lc_var, new_locale)
-
-        try:
-            normalized_locale = locale.getlocale()
-        except ValueError:
-            yield new_locale
+        normalized_locale = locale.getlocale()
+        if com._all_not_none(*normalized_locale):
+            yield '.'.join(normalized_locale)
         else:
-            if com._all_not_none(*normalized_locale):
-                yield '.'.join(normalized_locale)
-            else:
-                yield new_locale
+            yield new_locale
     finally:
         locale.setlocale(lc_var, current_locale)
 
 
-def _can_set_locale(lc):
-    """Check to see if we can set a locale without throwing an exception.
+def can_set_locale(lc, lc_var=locale.LC_ALL):
+    """
+    Check to see if we can set a locale, and subsequently get the locale,
+    without raising an Exception.
 
     Parameters
     ----------
     lc : str
         The locale to attempt to set.
+    lc_var : int, default `locale.LC_ALL`
+        The category of the locale being set.
 
     Returns
     -------
-    isvalid : bool
+    is_valid : bool
         Whether the passed locale can be set
     """
     try:
-        with set_locale(lc):
+        with set_locale(lc, lc_var=lc_var):
             pass
-    except locale.Error:  # horrible name for a Exception subclass
+    except (ValueError,
+            locale.Error):  # horrible name for a Exception subclass
         return False
     else:
         return True
@@ -546,7 +548,7 @@ def _valid_locales(locales, normalize):
     else:
         normalizer = lambda x: x.strip()
 
-    return list(filter(_can_set_locale, map(normalizer, locales)))
+    return list(filter(can_set_locale, map(normalizer, locales)))
 
 # -----------------------------------------------------------------------------
 # Stdout / stderr decorators

From af7b0ba461a5b81733afdc7fc816a869b798093d Mon Sep 17 00:00:00 2001
From: gfyoung <gfyoung17+GitHub@gmail.com>
Date: Tue, 11 Sep 2018 14:45:25 -0700
Subject: [PATCH 55/55] BUG: NaN should have pct rank of NaN (#22634)

Backport of gh-22600.
---
 doc/source/whatsnew/v0.23.5.txt    |  3 +++
 pandas/_libs/groupby_helper.pxi.in |  7 ++++++-
 pandas/tests/groupby/test_rank.py  | 19 ++++++++++++++++++-
 3 files changed, 27 insertions(+), 2 deletions(-)

diff --git a/doc/source/whatsnew/v0.23.5.txt b/doc/source/whatsnew/v0.23.5.txt
index 304ab12752ad4..f69e38e7fdd50 100644
--- a/doc/source/whatsnew/v0.23.5.txt
+++ b/doc/source/whatsnew/v0.23.5.txt
@@ -20,6 +20,9 @@ and bug fixes. We recommend that all users upgrade to this version.
 Fixed Regressions
 ~~~~~~~~~~~~~~~~~
 
+- Calling :meth:`DataFrameGroupBy.rank` and :meth:`SeriesGroupBy.rank` with empty groups
+  and ``pct=True`` was raising a ``ZeroDivisionError`` due to `c1068d9
+  <https://github.com/pandas-dev/pandas/commit/c1068d9d242c22cb2199156f6fb82eb5759178ae>`_ (:issue:`22519`)
 -
 -
 
diff --git a/pandas/_libs/groupby_helper.pxi.in b/pandas/_libs/groupby_helper.pxi.in
index b3e9b7c9e69ee..d7885e112a7e0 100644
--- a/pandas/_libs/groupby_helper.pxi.in
+++ b/pandas/_libs/groupby_helper.pxi.in
@@ -587,7 +587,12 @@ def group_rank_{{name}}(ndarray[float64_t, ndim=2] out,
 
         if pct:
             for i in range(N):
-                out[i, 0] = out[i, 0] / grp_sizes[i, 0]
+                # We don't include NaN values in percentage
+                # rankings, so we assign them percentages of NaN.
+                if out[i, 0] != out[i, 0] or out[i, 0] == NAN:
+                    out[i, 0] = NAN
+                else:
+                    out[i, 0] = out[i, 0] / grp_sizes[i, 0]
 {{endif}}
 {{endfor}}
 
diff --git a/pandas/tests/groupby/test_rank.py b/pandas/tests/groupby/test_rank.py
index 203c3c73bec94..d978e144e5013 100644
--- a/pandas/tests/groupby/test_rank.py
+++ b/pandas/tests/groupby/test_rank.py
@@ -1,7 +1,7 @@
 import pytest
 import numpy as np
 import pandas as pd
-from pandas import DataFrame, concat
+from pandas import DataFrame, Series, concat
 from pandas.util import testing as tm
 
 
@@ -252,3 +252,20 @@ def test_rank_object_raises(ties_method, ascending, na_option,
         df.groupby('key').rank(method=ties_method,
                                ascending=ascending,
                                na_option=na_option, pct=pct)
+
+
+def test_rank_empty_group():
+    # see gh-22519
+    column = "A"
+    df = DataFrame({
+        "A": [0, 1, 0],
+        "B": [1., np.nan, 2.]
+    })
+
+    result = df.groupby(column).B.rank(pct=True)
+    expected = Series([0.5, np.nan, 1.0], name="B")
+    tm.assert_series_equal(result, expected)
+
+    result = df.groupby(column).rank(pct=True)
+    expected = DataFrame({"B": [0.5, np.nan, 1.0]})
+    tm.assert_frame_equal(result, expected)