@@ -1782,13 +1782,13 @@ def set_atom(self, block, block_items, existing_col, min_itemsize,
17821782 return self .set_atom_timedelta64 (block )
17831783
17841784 dtype = block .dtype .name
1785- rvalues = block .values .ravel ()
1786- inferred_type = lib .infer_dtype (rvalues )
1785+ inferred_type = lib .infer_dtype (block .values )
17871786
17881787 if inferred_type == 'date' :
17891788 raise TypeError (
17901789 "[date] is not implemented as a table column" )
17911790 elif inferred_type == 'datetime' :
1791+ rvalues = block .values .ravel ()
17921792 if getattr (rvalues [0 ], 'tzinfo' , None ) is not None :
17931793
17941794 # if this block has more than one timezone, raise
@@ -1917,7 +1917,7 @@ def get_atom_data(self, block, kind=None):
19171917 def set_atom_data (self , block ):
19181918 self .kind = block .dtype .name
19191919 self .typ = self .get_atom_data (block )
1920- self .set_data (block .values .astype (self .typ .type ))
1920+ self .set_data (block .values .astype (self .typ .type , copy = False ))
19211921
19221922 def set_atom_categorical (self , block , items , info = None , values = None ):
19231923 # currently only supports a 1-D categorical
@@ -2016,7 +2016,7 @@ def convert(self, values, nan_rep, encoding):
20162016
20172017 index = DatetimeIndex (
20182018 self .data .ravel (), tz = 'UTC' ).tz_convert (self .tz )
2019- self .data = np .array (
2019+ self .data = np .asarray (
20202020 index .tolist (), dtype = object ).reshape (self .data .shape )
20212021
20222022 else :
@@ -2026,14 +2026,14 @@ def convert(self, values, nan_rep, encoding):
20262026 self .data = np .asarray (self .data , dtype = 'm8[ns]' )
20272027 elif dtype == u ('date' ):
20282028 try :
2029- self .data = np .array (
2029+ self .data = np .asarray (
20302030 [date .fromordinal (v ) for v in self .data ], dtype = object )
20312031 except ValueError :
2032- self .data = np .array (
2032+ self .data = np .asarray (
20332033 [date .fromtimestamp (v ) for v in self .data ],
20342034 dtype = object )
20352035 elif dtype == u ('datetime' ):
2036- self .data = np .array (
2036+ self .data = np .asarray (
20372037 [datetime .fromtimestamp (v ) for v in self .data ],
20382038 dtype = object )
20392039
@@ -2048,9 +2048,9 @@ def convert(self, values, nan_rep, encoding):
20482048 else :
20492049
20502050 try :
2051- self .data = self .data .astype (dtype )
2051+ self .data = self .data .astype (dtype , copy = False )
20522052 except :
2053- self .data = self .data .astype ('O' )
2053+ self .data = self .data .astype ('O' , copy = False )
20542054
20552055 # convert nans / decode
20562056 if _ensure_decoded (self .kind ) == u ('string' ):
@@ -2337,9 +2337,9 @@ def read_array(self, key):
23372337 ret = data
23382338
23392339 if dtype == u ('datetime64' ):
2340- ret = np .array (ret , dtype = 'M8[ns]' )
2340+ ret = np .asarray (ret , dtype = 'M8[ns]' )
23412341 elif dtype == u ('timedelta64' ):
2342- ret = np .array (ret , dtype = 'm8[ns]' )
2342+ ret = np .asarray (ret , dtype = 'm8[ns]' )
23432343
23442344 if transposed :
23452345 return ret .T
@@ -3793,7 +3793,7 @@ def write_data(self, chunksize, dropna=True):
37933793 # figure the mask: only do if we can successfully process this
37943794 # column, otherwise ignore the mask
37953795 mask = com .isnull (a .data ).all (axis = 0 )
3796- masks .append (mask .astype ('u1' ))
3796+ masks .append (mask .astype ('u1' , copy = False ))
37973797
37983798 # consolidate masks
37993799 mask = masks [0 ]
@@ -3803,8 +3803,7 @@ def write_data(self, chunksize, dropna=True):
38033803
38043804 else :
38053805
3806- mask = np .empty (nrows , dtype = 'u1' )
3807- mask .fill (False )
3806+ mask = None
38083807
38093808 # broadcast the indexes if needed
38103809 indexes = [a .cvalues for a in self .index_axes ]
@@ -3833,12 +3832,13 @@ def write_data(self, chunksize, dropna=True):
38333832 bvalues = []
38343833 for i , v in enumerate (values ):
38353834 new_shape = (nrows ,) + self .dtype [names [nindexes + i ]].shape
3836- bvalues .append (values [i ].ravel (). reshape (new_shape ))
3835+ bvalues .append (values [i ].reshape (new_shape ))
38373836
38383837 # write the chunks
38393838 if chunksize is None :
38403839 chunksize = 100000
38413840
3841+ rows = np .empty (min (chunksize ,nrows ), dtype = self .dtype )
38423842 chunks = int (nrows / chunksize ) + 1
38433843 for i in range (chunks ):
38443844 start_i = i * chunksize
@@ -3847,11 +3847,20 @@ def write_data(self, chunksize, dropna=True):
38473847 break
38483848
38493849 self .write_data_chunk (
3850+ rows ,
38503851 indexes = [a [start_i :end_i ] for a in bindexes ],
3851- mask = mask [start_i :end_i ],
3852+ mask = mask [start_i :end_i ] if mask is not None else None ,
38523853 values = [v [start_i :end_i ] for v in bvalues ])
38533854
3854- def write_data_chunk (self , indexes , mask , values ):
3855+ def write_data_chunk (self , rows , indexes , mask , values ):
3856+ """
3857+ Parameters
3858+ ----------
3859+ rows : an empty memory space where we are putting the chunk
3860+ indexes : an array of the indexes
3861+ mask : an array of the masks
3862+ values : an array of the values
3863+ """
38553864
38563865 # 0 len
38573866 for v in values :
@@ -3860,7 +3869,8 @@ def write_data_chunk(self, indexes, mask, values):
38603869
38613870 try :
38623871 nrows = indexes [0 ].shape [0 ]
3863- rows = np .empty (nrows , dtype = self .dtype )
3872+ if nrows != len (rows ):
3873+ rows = np .empty (nrows , dtype = self .dtype )
38643874 names = self .dtype .names
38653875 nindexes = len (indexes )
38663876
@@ -3873,7 +3883,10 @@ def write_data_chunk(self, indexes, mask, values):
38733883 rows [names [i + nindexes ]] = v
38743884
38753885 # mask
3876- rows = rows [~ mask .ravel ().astype (bool )]
3886+ if mask is not None :
3887+ m = ~ mask .ravel ().astype (bool , copy = False )
3888+ if not m .all ():
3889+ rows = rows [m ]
38773890
38783891 except Exception as detail :
38793892 raise Exception ("cannot create row-data -> %s" % detail )
@@ -4240,14 +4253,14 @@ def _convert_index(index, encoding=None, format_type=None):
42404253 tz = getattr (index , 'tz' , None ),
42414254 index_name = index_name )
42424255 elif inferred_type == 'datetime' :
4243- converted = np .array ([(time .mktime (v .timetuple ()) +
4244- v .microsecond / 1E6 ) for v in values ],
4245- dtype = np .float64 )
4256+ converted = np .asarray ([(time .mktime (v .timetuple ()) +
4257+ v .microsecond / 1E6 ) for v in values ],
4258+ dtype = np .float64 )
42464259 return IndexCol (converted , 'datetime' , _tables ().Time64Col (),
42474260 index_name = index_name )
42484261 elif inferred_type == 'date' :
4249- converted = np .array ([v .toordinal () for v in values ],
4250- dtype = np .int32 )
4262+ converted = np .asarray ([v .toordinal () for v in values ],
4263+ dtype = np .int32 )
42514264 return IndexCol (converted , 'date' , _tables ().Time32Col (),
42524265 index_name = index_name )
42534266 elif inferred_type == 'string' :
@@ -4290,21 +4303,21 @@ def _unconvert_index(data, kind, encoding=None):
42904303 if kind == u ('datetime64' ):
42914304 index = DatetimeIndex (data )
42924305 elif kind == u ('datetime' ):
4293- index = np .array ([datetime .fromtimestamp (v ) for v in data ],
4294- dtype = object )
4306+ index = np .asarray ([datetime .fromtimestamp (v ) for v in data ],
4307+ dtype = object )
42954308 elif kind == u ('date' ):
42964309 try :
4297- index = np .array (
4310+ index = np .asarray (
42984311 [date .fromordinal (v ) for v in data ], dtype = object )
42994312 except (ValueError ):
4300- index = np .array (
4313+ index = np .asarray (
43014314 [date .fromtimestamp (v ) for v in data ], dtype = object )
43024315 elif kind in (u ('integer' ), u ('float' )):
4303- index = np .array (data )
4316+ index = np .asarray (data )
43044317 elif kind in (u ('string' )):
43054318 index = _unconvert_string_array (data , nan_rep = None , encoding = encoding )
43064319 elif kind == u ('object' ):
4307- index = np .array (data [0 ])
4320+ index = np .asarray (data [0 ])
43084321 else : # pragma: no cover
43094322 raise ValueError ('unrecognized index type %s' % kind )
43104323 return index
@@ -4315,7 +4328,7 @@ def _unconvert_index_legacy(data, kind, legacy=False, encoding=None):
43154328 if kind == u ('datetime' ):
43164329 index = lib .time64_to_datetime (data )
43174330 elif kind in (u ('integer' )):
4318- index = np .array (data , dtype = object )
4331+ index = np .asarray (data , dtype = object )
43194332 elif kind in (u ('string' )):
43204333 index = _unconvert_string_array (data , nan_rep = None , encoding = encoding )
43214334 else : # pragma: no cover
@@ -4334,13 +4347,13 @@ def _convert_string_array(data, encoding, itemsize=None):
43344347 if itemsize is None :
43354348 itemsize = lib .max_len_string_array (com ._ensure_object (data .ravel ()))
43364349
4337- data = np .array (data , dtype = "S%d" % itemsize )
4350+ data = np .asarray (data , dtype = "S%d" % itemsize )
43384351 return data
43394352
43404353def _unconvert_string_array (data , nan_rep = None , encoding = None ):
43414354 """ deserialize a string array, possibly decoding """
43424355 shape = data .shape
4343- data = np .array (data .ravel (), dtype = object )
4356+ data = np .asarray (data .ravel (), dtype = object )
43444357
43454358 # guard against a None encoding in PY3 (because of a legacy
43464359 # where the passed encoding is actually None)
@@ -4353,7 +4366,7 @@ def _unconvert_string_array(data, nan_rep=None, encoding=None):
43534366 dtype = "U{0}" .format (itemsize )
43544367 else :
43554368 dtype = "S{0}" .format (itemsize )
4356- data = data .astype (dtype ).astype (object )
4369+ data = data .astype (dtype , copy = False ).astype (object , copy = False )
43574370 except (Exception ) as e :
43584371 f = np .vectorize (lambda x : x .decode (encoding ), otypes = [np .object ])
43594372 data = f (data )
@@ -4376,7 +4389,7 @@ def _maybe_convert(values, val_kind, encoding):
43764389def _get_converter (kind , encoding ):
43774390 kind = _ensure_decoded (kind )
43784391 if kind == 'datetime64' :
4379- return lambda x : np .array (x , dtype = 'M8[ns]' )
4392+ return lambda x : np .asarray (x , dtype = 'M8[ns]' )
43804393 elif kind == 'datetime' :
43814394 return lib .convert_timestamps
43824395 elif kind == 'string' :
@@ -4421,7 +4434,7 @@ def __init__(self, table, where=None, start=None, stop=None, **kwargs):
44214434 try :
44224435 inferred = lib .infer_dtype (where )
44234436 if inferred == 'integer' or inferred == 'boolean' :
4424- where = np .array (where )
4437+ where = np .asarray (where )
44254438 if where .dtype == np .bool_ :
44264439 start , stop = self .start , self .stop
44274440 if start is None :
0 commit comments