Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions .github/workflows/api-docs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,15 @@ jobs:
run: |
# Install package itself to install the samples datasets
python -m pip install --upgrade pip
# Add homogeneous TOML support (Python >= 3.12 has standard tomllib)
python -m pip install tomli
python scripts/extract_dependencies_from_pyproject_toml.py -f "pyproject.toml" > requires.txt
# First, install all dependencies except khiops-core and khiops-drivers-*
python -m pip install --user `perl -pe "s/khiops-\S+//g" requires.txt`
# khiops-core and khiops-drivers-* must always be installed from TestPyPI in order to avoid distorting usage statistics
python -m pip install --user --index-url https://test.pypi.org/simple `grep -oE "khiops-\S+" requires.txt | paste -sd ' ' -`
rm -f requires.txt
# Lastly, install khiops-python
python -m pip install --user .
kh-download-datasets --force-overwrite --version ${{ inputs.khiops-samples-revision || env.DEFAULT_KHIOPS_SAMPLES_REVISION }}
kh-status
Expand Down
42 changes: 18 additions & 24 deletions .github/workflows/dev-docker.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,6 @@ env:
DEFAULT_IMAGE_INCREMENT: 0
DEFAULT_SERVER_REVISION: main
DEFAULT_PYTHON_VERSIONS: 3.10 3.11 3.12 3.13 3.14
DEFAULT_KHIOPS_GCS_DRIVER_REVISION: 0.0.16
DEFAULT_KHIOPS_S3_DRIVER_REVISION: 0.0.15
DEFAULT_KHIOPS_AZURE_DRIVER_REVISION: 0.0.6 # XXX : to modify soon
on:
pull_request:
paths: [packaging/docker/khiopspydev/Dockerfile.*, .github/workflows/dev-docker.yml]
Expand All @@ -16,7 +13,7 @@ on:
khiops-revision:
type: string
default: 11.0.0
description: Khiops Revision
description: Khiops Revision (for tests against KhiopsDockerRunner)
image-increment:
type: number
default: 0
Expand All @@ -37,18 +34,6 @@ on:
type: string
default: main
description: Khiops Server Revision
khiops-gcs-driver-revision:
type: string
default: 0.0.16
description: Driver version for Google Cloud Storage remote files
khiops-s3-driver-revision:
type: string
default: 0.0.15
description: Driver version for AWS-S3 remote files
khiops-azure-driver-revision:
type: string
default: 0.0.6 # XXX : to modify soon
description: Driver version for Azure remote files and blobs
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: true
Expand All @@ -63,16 +48,28 @@ jobs:
packages: write # to write in the Github package registry
steps:
- name: Set input parameters as env or output
shell: bash
run: |
set -x
echo "KHIOPS_REVISION=${{ inputs.khiops-revision || env.DEFAULT_KHIOPS_REVISION }}" >> "$GITHUB_ENV"
echo "IMAGE_INCREMENT=${{ inputs.image-increment || env.DEFAULT_IMAGE_INCREMENT }}" >> "$GITHUB_ENV"
echo "KHIOPSDEV_OS_CODENAME=$(echo '${{ matrix.khiopsdev-os }}' | tr -d '0-9.')" >> "$GITHUB_ENV"
KHIOPSDEV_OS_CODENAME=$(echo '${{ matrix.khiopsdev-os }}' | tr -d '0-9.')
echo "KHIOPSDEV_OS_CODENAME=${KHIOPSDEV_OS_CODENAME}" >> "$GITHUB_ENV"
case ${KHIOPSDEV_OS_CODENAME} in
ubuntu | debian)
# same Dockerfile for the whole family
echo "DOCKER_FILE_NAME=Dockerfile.ubuntu-debian" >> "$GITHUB_ENV"
;;
rocky)
echo "DOCKER_FILE_NAME=Dockerfile.rocky" >> "$GITHUB_ENV"
;;
*)
echo "::error::Status error: '${KHIOPSDEV_OS_CODENAME}' is an unexpected OS codename."
exit 1
;;
esac
echo "SERVER_REVISION=${{ inputs.server-revision || env.DEFAULT_SERVER_REVISION }}" >> "$GITHUB_ENV"
echo "IMAGE_URL=ghcr.io/khiopsml/khiops-python/khiopspydev-${{ matrix.khiopsdev-os }}" >> "$GITHUB_ENV"
echo "KHIOPS_GCS_DRIVER_REVISION=${{ inputs.khiops-gcs-driver-revision || env.DEFAULT_KHIOPS_GCS_DRIVER_REVISION }}" >> "$GITHUB_ENV"
echo "KHIOPS_S3_DRIVER_REVISION=${{ inputs.khiops-s3-driver-revision || env.DEFAULT_KHIOPS_S3_DRIVER_REVISION }}" >> "$GITHUB_ENV"
echo "KHIOPS_AZURE_DRIVER_REVISION=${{ inputs.khiops-azure-driver-revision || env.DEFAULT_KHIOPS_AZURE_DRIVER_REVISION }}" >> "$GITHUB_ENV"
- name: Checkout khiops-python sources
uses: actions/checkout@v4
- name: Set up Docker Buildx
Expand Down Expand Up @@ -103,15 +100,12 @@ jobs:
# added using inputs because /etc/hosts is read-only for alternate builders (buildx via moby buildkit)
add-hosts: s3-bucket.localhost:127.0.0.1
context: ./packaging/docker/khiopspydev/
file: ./packaging/docker/khiopspydev/Dockerfile.${{ env.KHIOPSDEV_OS_CODENAME }}
file: ./packaging/docker/khiopspydev/${{ env.DOCKER_FILE_NAME }}
build-args: |
"KHIOPS_REVISION=${{ env.KHIOPS_REVISION }}"
"KHIOPSDEV_OS=${{ matrix.khiopsdev-os }}"
"SERVER_REVISION=${{ env.SERVER_REVISION }}"
"PYTHON_VERSIONS=${{ inputs.python-versions || env.DEFAULT_PYTHON_VERSIONS }}"
"KHIOPS_GCS_DRIVER_REVISION=${{ env.KHIOPS_GCS_DRIVER_REVISION }}"
"KHIOPS_S3_DRIVER_REVISION=${{ env.KHIOPS_S3_DRIVER_REVISION }}"
"KHIOPS_AZURE_DRIVER_REVISION=${{ env.KHIOPS_AZURE_DRIVER_REVISION }}"
tags: ${{ env.DOCKER_IMAGE_TAGS }}
# Push only on manual request
push: ${{ inputs.push || false }}
Expand Down
25 changes: 19 additions & 6 deletions .github/workflows/pip.yml
Original file line number Diff line number Diff line change
Expand Up @@ -77,23 +77,23 @@ jobs:
run: |
SAMPLES_REVISION=${{ inputs.samples-revision || env.DEFAULT_SAMPLES_REVISION }}
echo "SAMPLES_REVISION=$SAMPLES_REVISION" >> "$GITHUB_ENV"
- name: Checkout Khiops samples
- name: Checkout sources # Checking out the sources is mandatory to be able to extract the dependencies list
uses: actions/checkout@v4
with:
repository: khiopsml/khiops-samples
ref: ${{ env.SAMPLES_REVISION }}
token: ${{ secrets.GITHUB_TOKEN }}
path: khiops-samples
# Get Git tags so that versioneer can function correctly
# See issue https://github.com/actions/checkout/issues/701
fetch-depth: 0
- name: Download package artifact
uses: actions/download-artifact@v4
with:
name: pip-package
- name: Install package
- name: Install the package and download the samples
shell: bash
run: |
# Allow Pip to write to its cache
mkdir -p /github/home/.cache/pip
chown -R $(whoami) /github/home/.cache/pip

# Install the Khiops Python library

# A virtual env is mandatory under debian
Expand All @@ -102,10 +102,23 @@ jobs:
source khiops-debian-venv/bin/activate
fi
pip install --upgrade pip
# Add homogeneous TOML support (Python >= 3.12 has standard tomllib)
pip install tomli
python scripts/extract_dependencies_from_pyproject_toml.py -f "pyproject.toml" > requires.txt
# First, install all dependencies except khiops-core and khiops-drivers-*
pip install `perl -pe "s/khiops-\S+//g" requires.txt`
# khiops-core and khiops-drivers-* must always be installed from TestPyPI in order to avoid distorting usage statistics
pip install --index-url https://test.pypi.org/simple `grep -oE "khiops-\S+" requires.txt | paste -sd ' ' -`
rm -f requires.txt
# Lastly, install khiops-python
pip install $(ls khiops*.tar.gz)
if [[ "${{ matrix.container }}" == "debian13" ]]; then
deactivate
fi
# Download the samples to `khiops-samples` instead of the default location used by `kh-download-datasets`
wget -O khiops_samples.zip "https://github.com/KhiopsML/khiops-samples/releases/download/${{ env.SAMPLES_REVISION }}/khiops-samples-${{ env.SAMPLES_REVISION }}.zip"
mkdir -p ./khiops-samples && unzip khiops_samples.zip -d ./khiops-samples
rm -f khiops_samples.zip
- name: Run tests
env:
KHIOPS_SAMPLES_DIR: ${{ github.workspace }}/khiops-samples
Expand Down
13 changes: 8 additions & 5 deletions .github/workflows/quick-checks.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,14 @@ jobs:
- name: Install khiops-python dev dependencies
run: |
# Extract and install package dependency requirements from metadata
pip install pip-tools
python -m piptools compile -o requirements.txt

# Install dev dependencies
pip install -r requirements.txt
# Add homogeneous TOML support (Python >= 3.12 has standard tomllib)
pip install tomli
python scripts/extract_dependencies_from_pyproject_toml.py -f "pyproject.toml" > requires.txt
# First, install all dependencies except khiops-core and khiops-drivers-*
pip install `perl -pe "s/khiops-\S+//g" requires.txt`
# khiops-core and khiops-drivers-* must always be installed from TestPyPI in order to avoid distorting usage statistics
pip install --index-url https://test.pypi.org/simple `grep -oE "khiops-\S+" requires.txt | paste -sd ' ' -`
rm -f requires.txt

# Install black for the samples-generation script
pip install black
Expand Down
33 changes: 15 additions & 18 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,12 @@ jobs:
# Add homogeneous TOML support (Python >= 3.12 has standard tomllib)
$CONDA install -y -n "$CONDA_ENV" tomli
$CONDA run --no-capture-output -n "$CONDA_ENV" python scripts/extract_dependencies_from_pyproject_toml.py -f "pyproject.toml" > requires.txt
$CONDA install -y -n "$CONDA_ENV" `cat requires.txt`
# Conda is currently used to simulate multiple Python environment, let's avoid installing Conda Packages at all but Pip ones instead
$CONDA install -y -n "$CONDA_ENV" pip # Required otherwise the system pip would be run which will install outside the Conda env
# First, install all dependencies except khiops-core and khiops-drivers-*
$CONDA run -n "$CONDA_ENV" pip install `perl -pe "s/khiops-\S+//g" requires.txt`
# khiops-core and khiops-drivers-* must always be installed from TestPyPI in order to avoid distorting usage statistics
$CONDA run -n "$CONDA_ENV" pip install --index-url https://test.pypi.org/simple `grep -oE "khiops-\S+" requires.txt | paste -sd ' ' -`
rm -f requires.txt
- name: Configure Expensive Tests Setting
# Skip expensive tests by default, unless on the `main-v10` or `main` branches
Expand Down Expand Up @@ -267,17 +272,20 @@ jobs:
python scripts/extract_dependencies_from_pyproject_toml.py -f "pyproject.toml" -s "\n" > requires.txt

# Install the Python requirements outside a python venv
# khiops-core and khiops-drivers-* must always be installed from TestPyPI in order to avoid distorting usage statistics
# Use of both 'index-url' and '--extra-index-url' options allows indexes prioritization (PEP 766)
Get-Content .\requires.txt `
| ForEach-Object {python -m pip install $_.toString()}
| ForEach-Object {python -m pip install --index-url https://test.pypi.org/simple --extra-index-url https://pypi.org/simple $_.toString()}
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same as above regarding --extra-index-url.

Copy link
Copy Markdown
Collaborator Author

@tramora tramora May 29, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

for Windows (PowerShell), I do not know yet the tools that would help me split into 2 lines


# Create and activate a python venv
python -m venv khiops-windows-venv
khiops-windows-venv\Scripts\Activate.ps1

# Install the Python requirements inside a venv
# The venv python executable is used here
# Same indexes prioritization as above
Get-Content .\requires.txt `
| ForEach-Object {khiops-windows-venv\Scripts\python -m pip install $_.toString()}
| ForEach-Object {khiops-windows-venv\Scripts\python -m pip install --index-url https://test.pypi.org/simple --extra-index-url https://pypi.org/simple $_.toString()}
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Cf. the --extra-index-url comments above.

Copy link
Copy Markdown
Collaborator Author

@tramora tramora May 29, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

for Windows (PowerShell), I do not know yet the tools that would help me split into 2 lines


# Deactivate the python venv
deactivate
Expand Down Expand Up @@ -392,7 +400,10 @@ jobs:
# Install tomli for Python < 3.11
pip install tomli
python scripts/extract_dependencies_from_pyproject_toml.py -f "pyproject.toml" > requires.txt
pip install `cat requires.txt`
# First, install all dependencies except khiops-core and khiops-drivers-*
pip install `perl -pe "s/khiops-\S+//g" requires.txt`
# khiops-core and khiops-drivers-* must always be installed from TestPyPI in order to avoid distorting usage statistics
pip install --index-url https://test.pypi.org/simple `grep -oE "khiops-\S+" requires.txt | paste -sd ' ' -`
rm -f requires.txt
if [[ "${{ matrix.container }}" == "debian13" ]]; then
deactivate
Expand Down Expand Up @@ -447,20 +458,6 @@ jobs:
echo "::error::Status error: improper setup, as expected: khiops-python has been cloned, not installed from a package"
fi

# Run the library against an incompatible Khiops (with a different major version)
# This instance of Khiops is isolated in a dedicated conda environment
CONDA="/root/miniforge3/bin/conda"
# Check an error is raised because of the major version mismatch
# The khiops-python library from the cloned sources is used here
PATTERN=$($CONDA run -n py3_khiops10_conda python -c "import khiops.core as kh; print(kh.get_runner().khiops_version)" 2> >(grep -Ei 'major version.*?does not match'))
if [ -z "$PATTERN" ]; then
echo "::error::Status error: khiops-python should fail because of the major version mismatch"
if [[ "${{ matrix.container }}" == "debian13" ]]; then
deactivate
fi
exit 1;
fi

# Run the remaining integration tests
python -m unittest -v tests.test_khiops_integrations

Expand Down
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,12 @@
- (General) Support for Azure storage

### Changed
- (General) Full-Pip installation support: `khiops` now depends on the `khiops-core` and optionally on the remote storage driver packages
- (`core`) Rename `variable_part_dimensions` to `inner_variable_dimensions` in Coclustering results.

### Removed
- (General) Support of the installation type combining "OS Native Khiops Core" and "Pip Khiops Python library"

## 11.0.0.3 - 2026-03-06

### Added
Expand Down
17 changes: 17 additions & 0 deletions khiops/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,27 @@
(extension ".khcj")
- sklearn: Scikit-Learn estimator classes to learn and use Khiops models
"""
from importlib.metadata import PackageNotFoundError, distribution

from khiops.core import KhiopsEnvironmentError
from khiops.core.internals.version import KhiopsVersion

__version__ = "11.0.0.3"

# The current Khiops Python library (when packaged as a PyPI package)
# depends on the Khiops binary-only PyPI package
# An error must be raised if this dependency disappear
try:
# FIXME: will always raise an error when :
# the installation method is 'conda' or 'conda-based'
# and a Conda `khiops-core` package was installed not a PyPI package one
# distribution("khiops-core")
pass
except PackageNotFoundError as exc:
raise KhiopsEnvironmentError(
f"The Khiops binaries are not installed properly: " f"{exc}"
) from exc


def get_compatible_khiops_version():
"""Returns the latest Khiops version compatible with this package's version"""
Expand Down
62 changes: 59 additions & 3 deletions khiops/core/internals/filesystems.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
"""Classes to interact with local and remote filesystems"""
import json
import os
import platform
import shutil
import warnings
from abc import ABC, abstractmethod
Expand Down Expand Up @@ -116,8 +117,8 @@ def _child_uri_info(uri_info, child_name):
uri_info : `urllib.parse.ParseResult`
URI info structure (output of `urllib.parse.urlparse`)

child_name : str
Name of the new childe node
child_name : `str`
Name of the new child node

Returns
-------
Expand All @@ -127,6 +128,45 @@ def _child_uri_info(uri_info, child_name):
return uri_info._replace(path=_child_path(uri_info.path, child_name))


def _check_khiops_driver_library(remote_storage_type, khiops_drivers_path=None):
"""Ensures the dynamic library for the Khiops driver exists

Parameters
----------
remote_storage_type : `str`
Type of the supported storage type

khiops_drivers_path : `str`
Absolute path to the folder containing all the drivers libraries.
If a relative path (without the leading '/') is passed,
an absolute path is inferred using the current folder which will probably
cause an error.

this method raises a `RuntimeError` if the dynamic library is not found
"""
assert type(remote_storage_type) == str
assert remote_storage_type in ("s3", "gcs", "azure")
assert type(khiops_drivers_path) == str

match platform.system():
case "Windows":
extension = ".dll"
case "Darwin": # MacOS
extension = ".dylib"
case _: # any Linux platform
extension = ".so"
absolute_path = os.path.join(
os.path.abspath(khiops_drivers_path),
f"libkhiopsdriver_file_{remote_storage_type}{extension}",
)
if not os.path.exists(absolute_path):
raise RuntimeError(
f"Khiops driver for '{remote_storage_type}' "
f"is missing (expected in {absolute_path}). "
"Make sure you installed it."
)


######################
## Helper Functions ##
######################
Expand Down Expand Up @@ -538,6 +578,11 @@ class GoogleCloudStorageResource(FilesystemResource):
"""

def __init__(self, uri):

khiops_drivers_path = os.environ.get("KHIOPS_DRIVERS_PATH")
if khiops_drivers_path:
_check_khiops_driver_library("gcs", khiops_drivers_path=khiops_drivers_path)

# Stop initialization if google.cloud module is not available
if gcs_import_error is not None:
warnings.warn(
Expand Down Expand Up @@ -635,11 +680,16 @@ class AmazonS3Resource(FilesystemResource):
"""

def __init__(self, uri):

khiops_drivers_path = os.environ.get("KHIOPS_DRIVERS_PATH")
if khiops_drivers_path:
_check_khiops_driver_library("s3", khiops_drivers_path=khiops_drivers_path)

# Stop initialization if boto3 could not be imported
if boto3_import_error is not None:
warnings.warn(
"Could not import boto3 python library, "
"make sure you it installed to access S3 files."
"make sure you installed it to access S3 files."
)
raise boto3_import_error

Expand Down Expand Up @@ -778,6 +828,12 @@ def __init__(self, uri):
Azure Storage Resource initializer common to Files and Blobs
"""

khiops_drivers_path = os.environ.get("KHIOPS_DRIVERS_PATH")
if khiops_drivers_path:
_check_khiops_driver_library(
"azure", khiops_drivers_path=khiops_drivers_path
)

# Stop initialization if Azure modules are not available
if azure_import_error is not None:
warnings.warn(
Expand Down
Loading
Loading