Skip to content

Commit 0b86bd5

Browse files
authored
Merge branch 'main' into python-db-dtypes-pandas-issue28
2 parents 9277cf5 + 3c4b9b9 commit 0b86bd5

File tree

109 files changed

+1378
-1057
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

109 files changed

+1378
-1057
lines changed

.github/workflows/code-checks.yml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,10 @@ jobs:
7979
run: ci/code_checks.sh docstrings
8080
if: ${{ steps.build.outcome == 'success' && always() }}
8181

82+
- name: Run check of documentation notebooks
83+
run: ci/code_checks.sh notebooks
84+
if: ${{ steps.build.outcome == 'success' && always() }}
85+
8286
- name: Use existing environment for type checking
8387
run: |
8488
echo $PATH >> $GITHUB_PATH

.github/workflows/docbuild-and-upload.yml

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -54,11 +54,6 @@ jobs:
5454
- name: Build documentation zip
5555
run: doc/make.py zip_html
5656

57-
- name: Build the interactive terminal
58-
run: |
59-
cd web/interactive_terminal
60-
jupyter lite build
61-
6257
- name: Install ssh key
6358
run: |
6459
mkdir -m 700 -p ~/.ssh

.pre-commit-config.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ repos:
2626
hooks:
2727
- id: codespell
2828
types_or: [python, rst, markdown]
29+
additional_dependencies: [tomli]
2930
- repo: https://github.com/MarcoGorelli/cython-lint
3031
rev: v0.2.1
3132
hooks:

asv_bench/benchmarks/frame_ctor.py

Lines changed: 0 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -205,21 +205,4 @@ def time_frame_from_arrays_sparse(self):
205205
)
206206

207207

208-
class From3rdParty:
209-
# GH#44616
210-
211-
def setup(self):
212-
try:
213-
import torch
214-
except ImportError:
215-
raise NotImplementedError
216-
217-
row = 700000
218-
col = 64
219-
self.val_tensor = torch.randn(row, col)
220-
221-
def time_from_torch(self):
222-
DataFrame(self.val_tensor)
223-
224-
225208
from .pandas_vb_common import setup # noqa: F401 isort:skip

asv_bench/benchmarks/indexing.py

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,6 @@
33
lower-level methods directly on Index and subclasses, see index_object.py,
44
indexing_engine.py, and index_cached.py
55
"""
6-
import itertools
7-
import string
86
import warnings
97

108
import numpy as np
@@ -353,15 +351,13 @@ def setup(self, index):
353351
"non_monotonic": CategoricalIndex(list("abc" * N)),
354352
}
355353
self.data = indices[index]
356-
self.data_unique = CategoricalIndex(
357-
["".join(perm) for perm in itertools.permutations(string.printable, 3)]
358-
)
354+
self.data_unique = CategoricalIndex([str(i) for i in range(N * 3)])
359355

360356
self.int_scalar = 10000
361357
self.int_list = list(range(10000))
362358

363359
self.cat_scalar = "b"
364-
self.cat_list = ["a", "c"]
360+
self.cat_list = ["1", "3"]
365361

366362
def time_getitem_scalar(self, index):
367363
self.data[self.int_scalar]

asv_bench/benchmarks/multiindex_object.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -379,4 +379,26 @@ def time_isin_large(self, dtype):
379379
self.midx.isin(self.values_large)
380380

381381

382+
class Putmask:
383+
def setup(self):
384+
N = 10**5
385+
level1 = range(1_000)
386+
387+
level2 = date_range(start="1/1/2000", periods=N // 1000)
388+
self.midx = MultiIndex.from_product([level1, level2])
389+
390+
level1 = range(1_000, 2_000)
391+
self.midx_values = MultiIndex.from_product([level1, level2])
392+
393+
level2 = date_range(start="1/1/2010", periods=N // 1000)
394+
self.midx_values_different = MultiIndex.from_product([level1, level2])
395+
self.mask = np.array([True, False] * (N // 2))
396+
397+
def time_putmask(self):
398+
self.midx.putmask(self.mask, self.midx_values)
399+
400+
def time_putmask_all_different(self):
401+
self.midx.putmask(self.mask, self.midx_values_different)
402+
403+
382404
from .pandas_vb_common import setup # noqa: F401 isort:skip

asv_bench/benchmarks/reshape.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -210,7 +210,7 @@ def time_pivot_table_categorical_observed(self):
210210
)
211211

212212
def time_pivot_table_margins_only_column(self):
213-
self.df.pivot_table(columns=["key2", "key3"], margins=True)
213+
self.df.pivot_table(columns=["key1", "key2", "key3"], margins=True)
214214

215215

216216
class Crosstab:

asv_bench/benchmarks/series_methods.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -348,4 +348,37 @@ def time_rank(self, dtype):
348348
self.s.rank()
349349

350350

351+
class Iter:
352+
353+
param_names = ["dtype"]
354+
params = [
355+
"bool",
356+
"boolean",
357+
"int64",
358+
"Int64",
359+
"float64",
360+
"Float64",
361+
"datetime64[ns]",
362+
]
363+
364+
def setup(self, dtype):
365+
N = 10**5
366+
if dtype in ["bool", "boolean"]:
367+
data = np.repeat([True, False], N // 2)
368+
elif dtype in ["int64", "Int64"]:
369+
data = np.arange(N)
370+
elif dtype in ["float64", "Float64"]:
371+
data = np.random.randn(N)
372+
elif dtype == "datetime64[ns]":
373+
data = date_range("2000-01-01", freq="s", periods=N)
374+
else:
375+
raise NotImplementedError
376+
377+
self.s = Series(data, dtype=dtype)
378+
379+
def time_iter(self, dtype):
380+
for v in self.s:
381+
pass
382+
383+
351384
from .pandas_vb_common import setup # noqa: F401 isort:skip

asv_bench/benchmarks/sparse.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -219,12 +219,12 @@ def setup(self, fill_value):
219219
d = 1e-5
220220
arr = make_array(N, d, np.nan, np.float64)
221221
self.sp_arr = SparseArray(arr)
222-
b_arr = np.full(shape=N, fill_value=fill_value, dtype=np.bool8)
222+
b_arr = np.full(shape=N, fill_value=fill_value, dtype=np.bool_)
223223
fv_inds = np.unique(
224224
np.random.randint(low=0, high=N - 1, size=int(N * d), dtype=np.int32)
225225
)
226226
b_arr[fv_inds] = True if pd.isna(fill_value) else not fill_value
227-
self.sp_b_arr = SparseArray(b_arr, dtype=np.bool8, fill_value=fill_value)
227+
self.sp_b_arr = SparseArray(b_arr, dtype=np.bool_, fill_value=fill_value)
228228

229229
def time_mask(self, fill_value):
230230
self.sp_arr[self.sp_b_arr]

ci/code_checks.sh

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,10 @@
1212
# $ ./ci/code_checks.sh doctests # run doctests
1313
# $ ./ci/code_checks.sh docstrings # validate docstring errors
1414
# $ ./ci/code_checks.sh single-docs # check single-page docs build warning-free
15+
# $ ./ci/code_checks.sh notebooks # check execution of documentation notebooks
1516

16-
[[ -z "$1" || "$1" == "code" || "$1" == "doctests" || "$1" == "docstrings" || "$1" == "single-docs" ]] || \
17-
{ echo "Unknown command $1. Usage: $0 [code|doctests|docstrings]"; exit 9999; }
17+
[[ -z "$1" || "$1" == "code" || "$1" == "doctests" || "$1" == "docstrings" || "$1" == "single-docs" || "$1" == "notebooks" ]] || \
18+
{ echo "Unknown command $1. Usage: $0 [code|doctests|docstrings|single-docs|notebooks]"; exit 9999; }
1819

1920
BASE_DIR="$(dirname $0)/.."
2021
RET=0
@@ -84,6 +85,15 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
8485

8586
fi
8687

88+
### DOCUMENTATION NOTEBOOKS ###
89+
if [[ -z "$CHECK" || "$CHECK" == "notebooks" ]]; then
90+
91+
MSG='Notebooks' ; echo $MSG
92+
jupyter nbconvert --execute $(find doc/source -name '*.ipynb') --to notebook
93+
RET=$(($RET + $?)) ; echo $MSG "DONE"
94+
95+
fi
96+
8797
### SINGLE-PAGE DOCS ###
8898
if [[ -z "$CHECK" || "$CHECK" == "single-docs" ]]; then
8999
python doc/make.py --warnings-are-errors --single pandas.Series.value_counts

0 commit comments

Comments
 (0)