@@ -44,6 +44,78 @@ def test_strings(self):
4444 expected = Series (np .array ([1 , 0 , np .nan , 0 , 1 , 2 , np .nan ]))
4545 tm .assert_series_equal (result ,expected )
4646
47+ class TestFactorize (tm .TestCase ):
48+ _multiprocess_can_split_ = True
49+
50+ def test_basic (self ):
51+
52+ labels , uniques = algos .factorize (['a' , 'b' , 'b' , 'a' ,
53+ 'a' , 'c' , 'c' , 'c' ])
54+ self .assert_ (np .array_equal (labels , np .array ([ 0 , 1 , 1 , 0 , 0 , 2 , 2 , 2 ],dtype = np .int64 )))
55+ self .assert_ (np .array_equal (uniques , np .array (['a' ,'b' ,'c' ], dtype = object )))
56+
57+ labels , uniques = algos .factorize (['a' , 'b' , 'b' , 'a' ,
58+ 'a' , 'c' , 'c' , 'c' ], sort = True )
59+ self .assert_ (np .array_equal (labels , np .array ([ 0 , 1 , 1 , 0 , 0 , 2 , 2 , 2 ],dtype = np .int64 )))
60+ self .assert_ (np .array_equal (uniques , np .array (['a' ,'b' ,'c' ], dtype = object )))
61+
62+ labels , uniques = algos .factorize (list (reversed (range (5 ))))
63+ self .assert_ (np .array_equal (labels , np .array ([0 , 1 , 2 , 3 , 4 ], dtype = np .int64 )))
64+ self .assert_ (np .array_equal (uniques , np .array ([ 4 , 3 , 2 , 1 , 0 ],dtype = np .int64 )))
65+
66+ labels , uniques = algos .factorize (list (reversed (range (5 ))), sort = True )
67+ self .assert_ (np .array_equal (labels , np .array ([ 4 , 3 , 2 , 1 , 0 ],dtype = np .int64 )))
68+ self .assert_ (np .array_equal (uniques , np .array ([0 , 1 , 2 , 3 , 4 ], dtype = np .int64 )))
69+
70+ labels , uniques = algos .factorize (list (reversed (np .arange (5. ))))
71+ self .assert_ (np .array_equal (labels , np .array ([0. , 1. , 2. , 3. , 4. ], dtype = np .float64 )))
72+ self .assert_ (np .array_equal (uniques , np .array ([ 4 , 3 , 2 , 1 , 0 ],dtype = np .int64 )))
73+
74+ labels , uniques = algos .factorize (list (reversed (np .arange (5. ))), sort = True )
75+ self .assert_ (np .array_equal (labels , np .array ([ 4 , 3 , 2 , 1 , 0 ],dtype = np .int64 )))
76+ self .assert_ (np .array_equal (uniques , np .array ([0. , 1. , 2. , 3. , 4. ], dtype = np .float64 )))
77+
78+ def test_mixed (self ):
79+
80+ # doc example reshaping.rst
81+ x = Series (['A' , 'A' , np .nan , 'B' , 3.14 , np .inf ])
82+ labels , uniques = algos .factorize (x )
83+
84+ self .assert_ (np .array_equal (labels , np .array ([ 0 , 0 , - 1 , 1 , 2 , 3 ],dtype = np .int64 )))
85+ self .assert_ (np .array_equal (uniques , np .array (['A' , 'B' , 3.14 , np .inf ], dtype = object )))
86+
87+ labels , uniques = algos .factorize (x , sort = True )
88+ self .assert_ (np .array_equal (labels , np .array ([ 2 , 2 , - 1 , 3 , 0 , 1 ],dtype = np .int64 )))
89+ self .assert_ (np .array_equal (uniques , np .array ([3.14 , np .inf , 'A' , 'B' ], dtype = object )))
90+
91+ def test_datelike (self ):
92+
93+ # M8
94+ v1 = pd .Timestamp ('20130101 09:00:00.00004' )
95+ v2 = pd .Timestamp ('20130101' )
96+ x = Series ([v1 ,v1 ,v1 ,v2 ,v2 ,v1 ])
97+ labels , uniques = algos .factorize (x )
98+ self .assert_ (np .array_equal (labels , np .array ([ 0 ,0 ,0 ,1 ,1 ,0 ],dtype = np .int64 )))
99+ self .assert_ (np .array_equal (uniques , np .array ([v1 .value ,v2 .value ],dtype = 'M8[ns]' )))
100+
101+ labels , uniques = algos .factorize (x , sort = True )
102+ self .assert_ (np .array_equal (labels , np .array ([ 1 ,1 ,1 ,0 ,0 ,1 ],dtype = np .int64 )))
103+ self .assert_ (np .array_equal (uniques , np .array ([v2 .value ,v1 .value ],dtype = 'M8[ns]' )))
104+
105+ # period
106+ v1 = pd .Period ('201302' ,freq = 'M' )
107+ v2 = pd .Period ('201303' ,freq = 'M' )
108+ x = Series ([v1 ,v1 ,v1 ,v2 ,v2 ,v1 ])
109+
110+ # periods are not 'sorted' as they are converted back into an index
111+ labels , uniques = algos .factorize (x )
112+ self .assert_ (np .array_equal (labels , np .array ([ 0 ,0 ,0 ,1 ,1 ,0 ],dtype = np .int64 )))
113+ self .assert_ (np .array_equal (uniques , np .array ([v1 , v2 ],dtype = object )))
114+
115+ labels , uniques = algos .factorize (x ,sort = True )
116+ self .assert_ (np .array_equal (labels , np .array ([ 0 ,0 ,0 ,1 ,1 ,0 ],dtype = np .int64 )))
117+ self .assert_ (np .array_equal (uniques , np .array ([v1 , v2 ],dtype = object )))
118+
47119class TestUnique (tm .TestCase ):
48120 _multiprocess_can_split_ = True
49121
0 commit comments