@@ -867,16 +867,6 @@ def load(path):
867
867
finally :
868
868
f .close ()
869
869
870
- def console_encode (value ):
871
- if py3compat .PY3 or not isinstance (value , unicode ):
872
- return value
873
-
874
- try :
875
- import sys
876
- return value .encode (sys .stdin .encoding or 'utf-8' , 'replace' )
877
- except (AttributeError , TypeError ):
878
- return value .encode ('ascii' , 'replace' )
879
-
880
870
class UTF8Recoder :
881
871
"""
882
872
Iterator that reads an encoded stream and reencodes the input to UTF-8
@@ -968,3 +958,94 @@ def _concat_compat(to_concat, axis=0):
968
958
return new_values .view (_NS_DTYPE )
969
959
else :
970
960
return np .concatenate (to_concat , axis = axis )
961
+
962
+ # Unicode consolidation
963
+ # ---------------------
964
+ #
965
+ # pprinting utility functions for generating Unicode text or bytes(3.x)/str(2.x)
966
+ # representations of objects.
967
+ # Try to use these as much as possible rather then rolling your own.
968
+ #
969
+ # When to use
970
+ # -----------
971
+ #
972
+ # 1) If you're writing code internal to pandas (no I/O directly involved),
973
+ # use pprint_thing().
974
+ #
975
+ # It will always return unicode text which can handled by other
976
+ # parts of the package without breakage.
977
+ #
978
+ # 2) If you need to send something to the console, use console_encode().
979
+ #
980
+ # console_encode() should (hopefully) choose the right encoding for you
981
+ # based on the encoding set in fmt.print_config.encoding.
982
+ #
983
+ # 3) if you need to write something out to file, use pprint_thing_encoded(encoding).
984
+ #
985
+ # If no encoding is specified, it defaults to utf-8. SInce encoding pure ascii with
986
+ # utf-8 is a no-op you can safely use the default utf-8 if you're working with
987
+ # straight ascii.
988
+
989
+ def _pprint_seq (seq ,_nest_lvl = 0 ):
990
+ """
991
+ internal. pprinter for iterables. you should probably use pprint_thing()
992
+ rather then calling this directly.
993
+ """
994
+ fmt = u"[%s]" if hasattr (seq ,'__setitem__' ) else u"(%s)"
995
+ return fmt % ", " .join (pprint_thing (e ,_nest_lvl + 1 ) for e in seq )
996
+
997
+ def pprint_thing (thing ,_nest_lvl = 0 ):
998
+ """
999
+ This function is the sanctioned way of converting objects
1000
+ to a unicode representation.
1001
+
1002
+ properly handles nested sequences containing unicode strings
1003
+ (unicode(object) does not)
1004
+
1005
+ Parameters
1006
+ ----------
1007
+ thing : anything to be formatted
1008
+ _nest_lvl : internal use only. pprint_thing() is mutually-recursive
1009
+ with pprint_sequence, this argument is used to keep track of the
1010
+ current nesting level, and limit it.
1011
+
1012
+ Returns
1013
+ -------
1014
+ result - unicode object on py2, str on py3. Always Unicode.
1015
+
1016
+ """
1017
+ from pandas .core .format import print_config
1018
+ if thing is None :
1019
+ result = ''
1020
+ elif _is_sequence (thing ) and _nest_lvl < print_config .pprint_nest_depth :
1021
+ result = _pprint_seq (thing ,_nest_lvl )
1022
+ else :
1023
+ # when used internally in the package, everything
1024
+ # passed in should be a unicode object or have a unicode
1025
+ # __str__. However as an aid to transition, we also accept
1026
+ # utf8 encoded strings, if that's not it, we have no way
1027
+ # to know, and the user should deal with it himself.
1028
+ # so we resort to utf-8 with replacing errors
1029
+
1030
+ try :
1031
+ result = unicode (thing ) # we should try this first
1032
+ except UnicodeDecodeError :
1033
+ # either utf-8 or we replace errors
1034
+ result = str (thing ).decode ('utf-8' ,"replace" )
1035
+
1036
+ return unicode (result ) # always unicode
1037
+
1038
+ def pprint_thing_encoded (object ,encoding = 'utf-8' ,errors = 'replace' ):
1039
+ value = pprint_thing (object ) # get unicode representation of object
1040
+ return value .encode (encoding , errors )
1041
+
1042
+ def console_encode (object ):
1043
+ from pandas .core .format import print_config
1044
+ """
1045
+ this is the sanctioned way to prepare something for
1046
+ sending *to the console*, it delegates to pprint_thing() to get
1047
+ a unicode representation of the object relies on the global encoding
1048
+ set in print_config.encoding. Use this everywhere
1049
+ where you output to the console.
1050
+ """
1051
+ return pprint_thing_encoded (object ,print_config .encoding )
0 commit comments