Skip to content

API: timestamp resolution inference: default to microseconds when possible #62031

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 1 commit into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pandas/_libs/tslib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -355,7 +355,7 @@ cpdef array_to_datetime(
iresult[i] = parse_pydatetime(val, &dts, creso=creso)

elif PyDate_Check(val):
item_reso = NPY_DATETIMEUNIT.NPY_FR_s
item_reso = NPY_DATETIMEUNIT.NPY_FR_us
state.update_creso(item_reso)
if infer_reso:
creso = state.creso
Expand Down
6 changes: 5 additions & 1 deletion pandas/_libs/tslibs/conversion.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ from pandas._libs.missing cimport checknull_with_nat_and_na
from pandas._libs.tslibs.dtypes cimport (
abbrev_to_npy_unit,
get_supported_reso,
get_supported_reso_for_dts,
npy_unit_to_attrname,
periods_per_second,
)
Expand Down Expand Up @@ -507,6 +508,9 @@ cdef _TSObject convert_datetime_to_tsobject(
if nanos:
obj.dts.ps = nanos * 1000

reso = get_supported_reso_for_dts(reso, &obj.dts)
obj.creso = reso

try:
obj.value = npy_datetimestruct_to_datetime(reso, &obj.dts)
except OverflowError as err:
Expand Down Expand Up @@ -622,7 +626,7 @@ cdef _TSObject convert_str_to_tsobject(str ts, tzinfo tz,
&out_tzoffset, False
)
if not string_to_dts_failed:
reso = get_supported_reso(out_bestunit)
reso = get_supported_reso_for_dts(out_bestunit, &dts)
check_dts_bounds(&dts, reso)
obj = _TSObject()
obj.dts = dts
Expand Down
8 changes: 7 additions & 1 deletion pandas/_libs/tslibs/dtypes.pxd
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
from numpy cimport int64_t

from pandas._libs.tslibs.np_datetime cimport NPY_DATETIMEUNIT
from pandas._libs.tslibs.np_datetime cimport (
NPY_DATETIMEUNIT,
npy_datetimestruct,
)


cdef str npy_unit_to_abbrev(NPY_DATETIMEUNIT unit)
Expand All @@ -9,6 +12,9 @@ cdef NPY_DATETIMEUNIT freq_group_code_to_npy_unit(int freq) noexcept nogil
cpdef int64_t periods_per_day(NPY_DATETIMEUNIT reso=*) except? -1
cpdef int64_t periods_per_second(NPY_DATETIMEUNIT reso) except? -1
cdef NPY_DATETIMEUNIT get_supported_reso(NPY_DATETIMEUNIT reso)
cdef NPY_DATETIMEUNIT get_supported_reso_for_dts(
NPY_DATETIMEUNIT reso, npy_datetimestruct* dts
)
cdef bint is_supported_unit(NPY_DATETIMEUNIT reso)

cdef dict c_OFFSET_TO_PERIOD_FREQSTR
Expand Down
40 changes: 40 additions & 0 deletions pandas/_libs/tslibs/dtypes.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,21 @@
# originals
from enum import Enum

import numpy as np

from cpython.object cimport (
Py_GE,
Py_LE,
)

from pandas._libs.tslibs.ccalendar cimport c_MONTH_NUMBERS
from pandas._libs.tslibs.np_datetime cimport (
NPY_DATETIMEUNIT,
cmp_dtstructs,
get_conversion_factor,
import_pandas_datetime,
npy_datetimestruct,
pandas_datetime_to_datetimestruct,
)

import_pandas_datetime()
Expand Down Expand Up @@ -504,6 +514,36 @@ cdef NPY_DATETIMEUNIT get_supported_reso(NPY_DATETIMEUNIT reso):
return reso


cdef npy_datetimestruct dts_us_min, dts_us_max
pandas_datetime_to_datetimestruct(
np.iinfo(np.int64).min + 1, NPY_DATETIMEUNIT.NPY_FR_us, &dts_us_min
)
pandas_datetime_to_datetimestruct(
np.iinfo(np.int64).max, NPY_DATETIMEUNIT.NPY_FR_us, &dts_us_max
)


cdef NPY_DATETIMEUNIT get_supported_reso_for_dts(
NPY_DATETIMEUNIT reso, npy_datetimestruct* dts
):
# Similar as above, but taking the actual datetime value in account,
# defaulting to 'us' if possible.
if reso == NPY_DATETIMEUNIT.NPY_FR_GENERIC:
return NPY_DATETIMEUNIT.NPY_FR_ns
if reso < NPY_DATETIMEUNIT.NPY_FR_us:
if (
cmp_dtstructs(dts, &dts_us_min, Py_GE)
and cmp_dtstructs(dts, &dts_us_max, Py_LE)
):
return NPY_DATETIMEUNIT.NPY_FR_us
else:
# TODO still distinguish between ms or s?
return NPY_DATETIMEUNIT.NPY_FR_s
elif reso > NPY_DATETIMEUNIT.NPY_FR_ns:
return NPY_DATETIMEUNIT.NPY_FR_ns
return reso


cdef bint is_supported_unit(NPY_DATETIMEUNIT reso):
return (
reso == NPY_DATETIMEUNIT.NPY_FR_ns
Expand Down
13 changes: 5 additions & 8 deletions pandas/_libs/tslibs/strptime.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ from pandas._libs.tslibs.conversion cimport (
)
from pandas._libs.tslibs.dtypes cimport (
get_supported_reso,
get_supported_reso_for_dts,
npy_unit_to_abbrev,
npy_unit_to_attrname,
)
Expand Down Expand Up @@ -421,7 +422,7 @@ def array_strptime(
continue
elif PyDate_Check(val):
state.found_other = True
item_reso = NPY_DATETIMEUNIT.NPY_FR_s
item_reso = NPY_DATETIMEUNIT.NPY_FR_us
state.update_creso(item_reso)
if infer_reso:
creso = state.creso
Expand Down Expand Up @@ -460,7 +461,7 @@ def array_strptime(
if string_to_dts_succeeded:
# No error reported by string_to_dts, pick back up
# where we left off
item_reso = get_supported_reso(out_bestunit)
item_reso = get_supported_reso_for_dts(out_bestunit, &dts)
state.update_creso(item_reso)
if infer_reso:
creso = state.creso
Expand Down Expand Up @@ -622,7 +623,7 @@ cdef tzinfo _parse_with_format(
f"time data \"{val}\" doesn't match format \"{fmt}\""
)

item_reso[0] = NPY_DATETIMEUNIT.NPY_FR_s
item_reso[0] = NPY_DATETIMEUNIT.NPY_FR_us

iso_year = -1
year = 1900
Expand Down Expand Up @@ -710,11 +711,7 @@ cdef tzinfo _parse_with_format(
elif parse_code == 10:
# e.g. val='10:10:10.100'; fmt='%H:%M:%S.%f'
s = found_dict["f"]
if len(s) <= 3:
item_reso[0] = NPY_DATETIMEUNIT.NPY_FR_ms
elif len(s) <= 6:
item_reso[0] = NPY_DATETIMEUNIT.NPY_FR_us
else:
if len(s) > 6:
item_reso[0] = NPY_FR_ns
# Pad to always return nanoseconds
s += "0" * (9 - len(s))
Expand Down
Loading
Loading