From 7421077c79de53efb9d315bb972d3fc849b5516b Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Sun, 3 Aug 2025 09:29:01 +0200 Subject: [PATCH] API: timestamp resolution inference: default to microseconds when possible --- pandas/_libs/tslib.pyx | 2 +- pandas/_libs/tslibs/conversion.pyx | 6 +- pandas/_libs/tslibs/dtypes.pxd | 8 +- pandas/_libs/tslibs/dtypes.pyx | 40 +++++++ pandas/_libs/tslibs/strptime.pyx | 13 +-- pandas/tests/tools/test_to_datetime.py | 153 ++++++++++++++++++------- 6 files changed, 171 insertions(+), 51 deletions(-) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 3c5854602df53..63a64ffcf893c 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -355,7 +355,7 @@ cpdef array_to_datetime( iresult[i] = parse_pydatetime(val, &dts, creso=creso) elif PyDate_Check(val): - item_reso = NPY_DATETIMEUNIT.NPY_FR_s + item_reso = NPY_DATETIMEUNIT.NPY_FR_us state.update_creso(item_reso) if infer_reso: creso = state.creso diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index 2a080bcb19ae9..a16d14040d83a 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -33,6 +33,7 @@ from pandas._libs.missing cimport checknull_with_nat_and_na from pandas._libs.tslibs.dtypes cimport ( abbrev_to_npy_unit, get_supported_reso, + get_supported_reso_for_dts, npy_unit_to_attrname, periods_per_second, ) @@ -507,6 +508,9 @@ cdef _TSObject convert_datetime_to_tsobject( if nanos: obj.dts.ps = nanos * 1000 + reso = get_supported_reso_for_dts(reso, &obj.dts) + obj.creso = reso + try: obj.value = npy_datetimestruct_to_datetime(reso, &obj.dts) except OverflowError as err: @@ -622,7 +626,7 @@ cdef _TSObject convert_str_to_tsobject(str ts, tzinfo tz, &out_tzoffset, False ) if not string_to_dts_failed: - reso = get_supported_reso(out_bestunit) + reso = get_supported_reso_for_dts(out_bestunit, &dts) check_dts_bounds(&dts, reso) obj = _TSObject() obj.dts = dts diff --git a/pandas/_libs/tslibs/dtypes.pxd b/pandas/_libs/tslibs/dtypes.pxd index d8c536a34bc04..e8e8c6a477773 100644 --- a/pandas/_libs/tslibs/dtypes.pxd +++ b/pandas/_libs/tslibs/dtypes.pxd @@ -1,6 +1,9 @@ from numpy cimport int64_t -from pandas._libs.tslibs.np_datetime cimport NPY_DATETIMEUNIT +from pandas._libs.tslibs.np_datetime cimport ( + NPY_DATETIMEUNIT, + npy_datetimestruct, +) cdef str npy_unit_to_abbrev(NPY_DATETIMEUNIT unit) @@ -9,6 +12,9 @@ cdef NPY_DATETIMEUNIT freq_group_code_to_npy_unit(int freq) noexcept nogil cpdef int64_t periods_per_day(NPY_DATETIMEUNIT reso=*) except? -1 cpdef int64_t periods_per_second(NPY_DATETIMEUNIT reso) except? -1 cdef NPY_DATETIMEUNIT get_supported_reso(NPY_DATETIMEUNIT reso) +cdef NPY_DATETIMEUNIT get_supported_reso_for_dts( + NPY_DATETIMEUNIT reso, npy_datetimestruct* dts +) cdef bint is_supported_unit(NPY_DATETIMEUNIT reso) cdef dict c_OFFSET_TO_PERIOD_FREQSTR diff --git a/pandas/_libs/tslibs/dtypes.pyx b/pandas/_libs/tslibs/dtypes.pyx index 4100f3d90e817..8913ff32720e8 100644 --- a/pandas/_libs/tslibs/dtypes.pyx +++ b/pandas/_libs/tslibs/dtypes.pyx @@ -2,11 +2,21 @@ # originals from enum import Enum +import numpy as np + +from cpython.object cimport ( + Py_GE, + Py_LE, +) + from pandas._libs.tslibs.ccalendar cimport c_MONTH_NUMBERS from pandas._libs.tslibs.np_datetime cimport ( NPY_DATETIMEUNIT, + cmp_dtstructs, get_conversion_factor, import_pandas_datetime, + npy_datetimestruct, + pandas_datetime_to_datetimestruct, ) import_pandas_datetime() @@ -504,6 +514,36 @@ cdef NPY_DATETIMEUNIT get_supported_reso(NPY_DATETIMEUNIT reso): return reso +cdef npy_datetimestruct dts_us_min, dts_us_max +pandas_datetime_to_datetimestruct( + np.iinfo(np.int64).min + 1, NPY_DATETIMEUNIT.NPY_FR_us, &dts_us_min +) +pandas_datetime_to_datetimestruct( + np.iinfo(np.int64).max, NPY_DATETIMEUNIT.NPY_FR_us, &dts_us_max +) + + +cdef NPY_DATETIMEUNIT get_supported_reso_for_dts( + NPY_DATETIMEUNIT reso, npy_datetimestruct* dts +): + # Similar as above, but taking the actual datetime value in account, + # defaulting to 'us' if possible. + if reso == NPY_DATETIMEUNIT.NPY_FR_GENERIC: + return NPY_DATETIMEUNIT.NPY_FR_ns + if reso < NPY_DATETIMEUNIT.NPY_FR_us: + if ( + cmp_dtstructs(dts, &dts_us_min, Py_GE) + and cmp_dtstructs(dts, &dts_us_max, Py_LE) + ): + return NPY_DATETIMEUNIT.NPY_FR_us + else: + # TODO still distinguish between ms or s? + return NPY_DATETIMEUNIT.NPY_FR_s + elif reso > NPY_DATETIMEUNIT.NPY_FR_ns: + return NPY_DATETIMEUNIT.NPY_FR_ns + return reso + + cdef bint is_supported_unit(NPY_DATETIMEUNIT reso): return ( reso == NPY_DATETIMEUNIT.NPY_FR_ns diff --git a/pandas/_libs/tslibs/strptime.pyx b/pandas/_libs/tslibs/strptime.pyx index b443aa7bede22..84c2a449cdb06 100644 --- a/pandas/_libs/tslibs/strptime.pyx +++ b/pandas/_libs/tslibs/strptime.pyx @@ -53,6 +53,7 @@ from pandas._libs.tslibs.conversion cimport ( ) from pandas._libs.tslibs.dtypes cimport ( get_supported_reso, + get_supported_reso_for_dts, npy_unit_to_abbrev, npy_unit_to_attrname, ) @@ -421,7 +422,7 @@ def array_strptime( continue elif PyDate_Check(val): state.found_other = True - item_reso = NPY_DATETIMEUNIT.NPY_FR_s + item_reso = NPY_DATETIMEUNIT.NPY_FR_us state.update_creso(item_reso) if infer_reso: creso = state.creso @@ -460,7 +461,7 @@ def array_strptime( if string_to_dts_succeeded: # No error reported by string_to_dts, pick back up # where we left off - item_reso = get_supported_reso(out_bestunit) + item_reso = get_supported_reso_for_dts(out_bestunit, &dts) state.update_creso(item_reso) if infer_reso: creso = state.creso @@ -622,7 +623,7 @@ cdef tzinfo _parse_with_format( f"time data \"{val}\" doesn't match format \"{fmt}\"" ) - item_reso[0] = NPY_DATETIMEUNIT.NPY_FR_s + item_reso[0] = NPY_DATETIMEUNIT.NPY_FR_us iso_year = -1 year = 1900 @@ -710,11 +711,7 @@ cdef tzinfo _parse_with_format( elif parse_code == 10: # e.g. val='10:10:10.100'; fmt='%H:%M:%S.%f' s = found_dict["f"] - if len(s) <= 3: - item_reso[0] = NPY_DATETIMEUNIT.NPY_FR_ms - elif len(s) <= 6: - item_reso[0] = NPY_DATETIMEUNIT.NPY_FR_us - else: + if len(s) > 6: item_reso[0] = NPY_FR_ns # Pad to always return nanoseconds s += "0" * (9 - len(s)) diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py index b02fab70fb825..8d7b43d1870be 100644 --- a/pandas/tests/tools/test_to_datetime.py +++ b/pandas/tests/tools/test_to_datetime.py @@ -119,7 +119,7 @@ def test_to_datetime_format_YYYYMMDD_with_nat(self, cache): expected = Series( [Timestamp("19801222"), Timestamp("19801222")] + [Timestamp("19810105")] * 5, - dtype="M8[s]", + dtype="M8[us]", ) expected[2] = np.nan ser[2] = np.nan @@ -146,7 +146,7 @@ def test_to_datetime_format_YYYYMM_with_nat(self, cache): expected = Series( [Timestamp("19801201"), Timestamp("19801201")] + [Timestamp("19810101")] * 5, - dtype="M8[s]", + dtype="M8[us]", ) expected[2] = np.nan ser[2] = np.nan @@ -160,7 +160,7 @@ def test_to_datetime_format_YYYYMMDD_oob_for_ns(self, cache): result = to_datetime(ser, format="%Y%m%d", errors="raise", cache=cache) expected = Series( np.array(["2012-12-31", "2014-12-31", "9999-12-31"], dtype="M8[s]"), - dtype="M8[s]", + dtype="M8[us]", ) tm.assert_series_equal(result, expected) @@ -169,7 +169,7 @@ def test_to_datetime_format_YYYYMMDD_coercion(self, cache): # GH 7930 ser = Series([20121231, 20141231, 999999999999999999999999999991231]) result = to_datetime(ser, format="%Y%m%d", errors="coerce", cache=cache) - expected = Series(["20121231", "20141231", "NaT"], dtype="M8[s]") + expected = Series(["20121231", "20141231", "NaT"], dtype="M8[us]") tm.assert_series_equal(result, expected) @pytest.mark.parametrize( @@ -579,7 +579,7 @@ def test_to_datetime_mixed_date_and_string(self, format): # https://github.com/pandas-dev/pandas/issues/50108 d1 = date(2020, 1, 2) res = to_datetime(["2020-01-01", d1], format=format) - expected = DatetimeIndex(["2020-01-01", "2020-01-02"], dtype="M8[s]") + expected = DatetimeIndex(["2020-01-01", "2020-01-02"], dtype="M8[us]") tm.assert_index_equal(res, expected) @pytest.mark.parametrize( @@ -641,8 +641,6 @@ def test_to_datetime_mixed_datetime_and_string_with_format( ts1 = constructor(args[0]) ts2 = args[1] result = to_datetime([ts1, ts2], format=fmt, utc=utc) - if constructor is Timestamp: - expected = expected.as_unit("s") tm.assert_index_equal(result, expected) @pytest.mark.parametrize( @@ -714,7 +712,7 @@ def test_to_datetime_mixed_offsets_with_none_tz_utc_false_removed( "%Y-%m-%d %H:%M:%S%z", DatetimeIndex( ["2000-01-01 08:00:00+00:00", "2000-01-02 00:00:00+00:00", "NaT"], - dtype="datetime64[s, UTC]", + dtype="datetime64[us, UTC]", ), id="ISO8601, UTC", ), @@ -722,7 +720,7 @@ def test_to_datetime_mixed_offsets_with_none_tz_utc_false_removed( "%Y-%d-%m %H:%M:%S%z", DatetimeIndex( ["2000-01-01 08:00:00+00:00", "2000-02-01 00:00:00+00:00", "NaT"], - dtype="datetime64[s, UTC]", + dtype="datetime64[us, UTC]", ), id="non-ISO8601, UTC", ), @@ -1157,7 +1155,7 @@ def test_to_datetime_tz(self, cache): result = to_datetime(arr, cache=cache) expected = DatetimeIndex( ["2013-01-01 13:00:00", "2013-01-02 14:00:00"], tz="US/Pacific" - ).as_unit("s") + ).as_unit("us") tm.assert_index_equal(result, expected) def test_to_datetime_tz_mixed(self, cache): @@ -1176,7 +1174,7 @@ def test_to_datetime_tz_mixed(self, cache): result = to_datetime(arr, cache=cache, errors="coerce") expected = DatetimeIndex( - ["2013-01-01 13:00:00-08:00", "NaT"], dtype="datetime64[s, US/Pacific]" + ["2013-01-01 13:00:00-08:00", "NaT"], dtype="datetime64[us, US/Pacific]" ) tm.assert_index_equal(result, expected) @@ -1469,17 +1467,15 @@ def test_to_datetime_cache_scalar(self): assert result == expected @pytest.mark.parametrize( - "datetimelikes,expected_values,exp_unit", + "datetimelikes,expected_values", ( ( (None, np.nan) + (NaT,) * start_caching_at, (NaT,) * (start_caching_at + 2), - "s", ), ( (None, Timestamp("2012-07-26")) + (NaT,) * start_caching_at, (NaT, Timestamp("2012-07-26")) + (NaT,) * start_caching_at, - "s", ), ( (None,) @@ -1487,12 +1483,11 @@ def test_to_datetime_cache_scalar(self): + ("2012 July 26", Timestamp("2012-07-26")), (NaT,) * (start_caching_at + 1) + (Timestamp("2012-07-26"), Timestamp("2012-07-26")), - "s", ), ), ) def test_convert_object_to_datetime_with_cache( - self, datetimelikes, expected_values, exp_unit + self, datetimelikes, expected_values ): # GH#39882 ser = Series( @@ -1500,10 +1495,10 @@ def test_convert_object_to_datetime_with_cache( dtype="object", ) result_series = to_datetime(ser, errors="coerce") - expected_series = Series( - expected_values, - dtype=f"datetime64[{exp_unit}]", - ) + expected_series = Series(expected_values, dtype="datetime64[us]") + if expected_series.isna().all(): + # TODO should this also be `us`? + expected_series = expected_series.astype("datetime64[s]") tm.assert_series_equal(result_series, expected_series) @pytest.mark.parametrize( @@ -1578,13 +1573,15 @@ def test_to_datetime_coerce_oob(self, string_arg, format, outofbounds): format.startswith("%B") ^ outofbounds.startswith("J") ): # the strings don't match the given format, so they raise and we coerce - expected = DatetimeIndex([datetime(2018, 3, 1), NaT], dtype="M8[s]") + expected = DatetimeIndex([datetime(2018, 3, 1), NaT], dtype="M8[us]") elif isinstance(outofbounds, datetime): expected = DatetimeIndex( [datetime(2018, 3, 1), outofbounds], dtype="M8[us]" ) else: - expected = DatetimeIndex([datetime(2018, 3, 1), outofbounds], dtype="M8[s]") + expected = DatetimeIndex( + [datetime(2018, 3, 1), outofbounds], dtype="M8[us]" + ) tm.assert_index_equal(result, expected) def test_to_datetime_malformed_no_raise(self): @@ -1644,7 +1641,7 @@ def test_iso_8601_strings_with_different_offsets_utc(self): result = to_datetime(ts_strings, utc=True) expected = DatetimeIndex( [Timestamp(2015, 11, 18, 10), Timestamp(2015, 11, 18, 10), NaT], tz="UTC" - ).as_unit("s") + ).as_unit("us") tm.assert_index_equal(result, expected) def test_mixed_offsets_with_native_datetime_utc_false_raises(self): @@ -1670,7 +1667,7 @@ def test_non_iso_strings_with_tz_offset(self): result = to_datetime(["March 1, 2018 12:00:00+0400"] * 2) expected = DatetimeIndex( [datetime(2018, 3, 1, 12, tzinfo=timezone(timedelta(minutes=240)))] * 2 - ).as_unit("s") + ).as_unit("us") tm.assert_index_equal(result, expected) @pytest.mark.parametrize( @@ -1693,7 +1690,7 @@ def test_to_datetime_with_format_out_of_bounds(self, dt_str): # GH 9107 res = to_datetime(dt_str, format="%Y%m%d") dtobj = datetime.strptime(dt_str, "%Y%m%d") - expected = Timestamp(dtobj).as_unit("s") + expected = Timestamp(dtobj).as_unit("us") assert res == expected assert res.unit == expected.unit @@ -2214,7 +2211,7 @@ def test_dataframe_utc_true(self): df = DataFrame({"year": [2015, 2016], "month": [2, 3], "day": [4, 5]}) result = to_datetime(df, utc=True) expected = Series( - np.array(["2015-02-04", "2016-03-05"], dtype="datetime64[s]") + np.array(["2015-02-04", "2016-03-05"], dtype="datetime64[us]") ).dt.tz_localize("UTC") tm.assert_series_equal(result, expected) @@ -2420,7 +2417,7 @@ def test_to_datetime_with_space_in_series(self, cache): result_coerce = to_datetime(ser, errors="coerce", cache=cache) expected_coerce = Series( [datetime(2006, 10, 18), datetime(2008, 10, 18), NaT] - ).dt.as_unit("s") + ).dt.as_unit("us") tm.assert_series_equal(result_coerce, expected_coerce) @td.skip_if_not_us_locale @@ -2541,7 +2538,7 @@ def test_string_na_nat_conversion(self, cache): strings = np.array(["1/1/2000", "1/2/2000", np.nan, "1/4/2000"], dtype=object) - expected = np.empty(4, dtype="M8[s]") + expected = np.empty(4, dtype="M8[us]") for i, val in enumerate(strings): if isna(val): expected[i] = iNaT @@ -2586,7 +2583,7 @@ def test_string_na_nat_conversion_with_name(self, cache): result = to_datetime(series, cache=cache) dresult = to_datetime(dseries, cache=cache) - expected = Series(np.empty(5, dtype="M8[s]"), index=idx) + expected = Series(np.empty(5, dtype="M8[us]"), index=idx) for i in range(5): x = series.iloc[i] if isna(x): @@ -2626,7 +2623,7 @@ def test_dayfirst(self, cache): arr = ["10/02/2014", "11/02/2014", "12/02/2014"] expected = DatetimeIndex( [datetime(2014, 2, 10), datetime(2014, 2, 11), datetime(2014, 2, 12)] - ).as_unit("s") + ).as_unit("us") idx1 = DatetimeIndex(arr, dayfirst=True) idx2 = DatetimeIndex(np.array(arr), dayfirst=True) idx3 = to_datetime(arr, dayfirst=True, cache=cache) @@ -2650,7 +2647,7 @@ def test_dayfirst_warnings_valid_input(self): # CASE 1: valid input arr = ["31/12/2014", "10/03/2011"] expected = DatetimeIndex( - ["2014-12-31", "2011-03-10"], dtype="datetime64[s]", freq=None + ["2014-12-31", "2011-03-10"], dtype="datetime64[us]", freq=None ) # A. dayfirst arg correct, no warning @@ -2755,7 +2752,7 @@ def test_to_datetime_consistent_format(self, cache): ser = Series(np.array(data)) result = to_datetime(ser, cache=cache) expected = Series( - ["2011-01-01", "2011-02-01", "2011-03-01"], dtype="datetime64[s]" + ["2011-01-01", "2011-02-01", "2011-03-01"], dtype="datetime64[us]" ) tm.assert_series_equal(result, expected) @@ -2767,7 +2764,9 @@ def test_to_datetime_series_with_nans(self, cache): ) ) result = to_datetime(ser, cache=cache) - expected = Series(["2011-01-01", NaT, "2011-01-03", NaT], dtype="datetime64[s]") + expected = Series( + ["2011-01-01", NaT, "2011-01-03", NaT], dtype="datetime64[us]" + ) tm.assert_series_equal(result, expected) def test_to_datetime_series_start_with_nans(self, cache): @@ -2786,7 +2785,7 @@ def test_to_datetime_series_start_with_nans(self, cache): result = to_datetime(ser, cache=cache) expected = Series( - [NaT, NaT, "2011-01-01", "2011-01-02", "2011-01-03"], dtype="datetime64[s]" + [NaT, NaT, "2011-01-01", "2011-01-02", "2011-01-03"], dtype="datetime64[us]" ) tm.assert_series_equal(result, expected) @@ -2800,7 +2799,7 @@ def test_infer_datetime_format_tz_name(self, tz_name, offset): result = to_datetime(ser) tz = timezone(timedelta(minutes=offset)) expected = Series([Timestamp("2019-02-02 08:07:13").tz_localize(tz)]) - expected = expected.dt.as_unit("s") + expected = expected.dt.as_unit("us") tm.assert_series_equal(result, expected) @pytest.mark.parametrize( @@ -2964,9 +2963,9 @@ def test_parsers(self, date_str, expected, cache): reso = { "nanosecond": "ns", "microsecond": "us", - "millisecond": "ms", - "second": "s", - }.get(reso_attrname, "s") + "millisecond": "us", + "second": "us", + }.get(reso_attrname, "us") result2 = to_datetime(date_str, yearfirst=yearfirst) result3 = to_datetime([date_str], yearfirst=yearfirst) # result5 is used below @@ -3412,7 +3411,7 @@ def test_empty_string_datetime(errors, args, format): # coerce empty string to pd.NaT result = to_datetime(td, format=format, errors=errors) - expected = Series(["2016-03-24", "2016-03-25", NaT], dtype="datetime64[s]") + expected = Series(["2016-03-24", "2016-03-25", NaT], dtype="datetime64[us]") tm.assert_series_equal(expected, result) @@ -3597,7 +3596,7 @@ def test_to_datetime_with_empty_str_utc_false_format_mixed(): # GH 50887 vals = ["2020-01-01 00:00+00:00", ""] result = to_datetime(vals, format="mixed") - expected = Index([Timestamp("2020-01-01 00:00+00:00"), "NaT"], dtype="M8[s, UTC]") + expected = Index([Timestamp("2020-01-01 00:00+00:00"), "NaT"], dtype="M8[us, UTC]") tm.assert_index_equal(result, expected) # Check that a couple of other similar paths work the same way @@ -3770,3 +3769,77 @@ def test_to_datetime_wrapped_datetime64_ps(): ["1970-01-01 00:00:01.901901901"], dtype="datetime64[ns]", freq=None ) tm.assert_index_equal(result, expected) + + +class TestToDatetimeInferUnit: + @pytest.mark.parametrize( + "hour,unit", + [ + ("", "us"), + ("T09:00", "us"), + ("T09:00:00", "us"), + ("T09:00:00.123", "us"), + ("T09:00:00.123456", "us"), + ("T09:00:00.123456789", "ns"), + ("T09:00:00.123456789123", "ns"), + ], + ) + def test_strings(self, hour, unit): + result = to_datetime(["2020-01-01" + hour, "2020-01-02" + hour]) + assert result.dtype == f"datetime64[{unit}]" + + # parsing from out of bounds date does not actually work + # def test_strings_out_of_bounds(self): + # pd.to_datetime(["-290301-01-01"], format="ISO8601") + + @pytest.mark.parametrize( + "dt", + [ + datetime(2020, 1, 1), + datetime(2020, 1, 1, 9, 0, 30), + datetime(2020, 1, 1, 9, 0, 30, 123), + datetime(2020, 1, 1, 9, 0, 30, 123456), + ], + ) + def test_datetime_datetime(self, dt): + # range of datetime.datetime/date objects are always supported for us + result = to_datetime([dt, dt]) + assert result.dtype == "datetime64[us]" + + @pytest.mark.parametrize("year", [2012, 9999]) + def test_datetime_date(self, year): + dt = date(year, 1, 1) + result = to_datetime([dt, dt]) + assert result.dtype == "datetime64[us]" + + @pytest.mark.parametrize("unit", ["s", "ms", "us", "ns"]) + def test_np_datetime64_array(self, unit): + # numpy datetime64 already has a unit -> preserve that in this case + arr = np.array(["2020-01-01T09:00:30.123456"], dtype=f"datetime64[{unit}]") + result = to_datetime(arr) + assert result.dtype == arr.dtype + + @pytest.mark.parametrize("unit", ["s", "ms", "us", "ns"]) + def test_np_datetime64_objects(self, unit): + dt = np.datetime64("2020-01-01T09:00:30.123456", unit) + result = to_datetime([dt, dt]) + assert result.dtype == dt.dtype + + @pytest.mark.parametrize("unit", ["s", "ms", "us", "ns"]) + def test_timestamp_objects(self, unit): + ts = Timestamp("2020-01-01T09:00:30").as_unit(unit) + result = to_datetime([ts, ts]) + assert result.dtype == f"datetime64[{unit}]" + + # @pytest.mark.parametrize("year", [2012, 9999]) + # def test_dataframe_components(self, year): + # df = pd.DataFrame({ + # "year": [year, year], + # "month": [1, 1], + # "day": [1, 2], + # "hour": [9, 10], + # "minute": [0, 30], + # "second": [30, 45], + # }) + # result = to_datetime(df) + # assert result.dtype == "datetime64[us]"