From ceaf21f41474788c82dc79c75608d80b4965ae31 Mon Sep 17 00:00:00 2001 From: Joel Mason Date: Wed, 4 Apr 2018 17:02:15 +1000 Subject: [PATCH 01/16] PyArray conversion speedups and PyArrayFromBuffer * PyArray_Info parameterised T,N and immutable * PyArray_Info size and stride changed to Tuples (from Vectors) * PyArray_Info data changed from Ptr{Cvoid} to Ptr{T} * `PyArrayInfoFromBuffer(o::PyObject)` faster way to get PyArray_Info from numpy than numpy's __array_interface__ * `PyArrayFromBuffer(o::PyObject)` 4x faster PyArray conversion * ArrayFromBuffer no copy conversion to Julia Array * moved PyBuffer tests to separate file --- benchmarks/arrayperf.jl | 57 ++++++++++++++++++++++++ src/PyCall.jl | 3 +- src/numpy.jl | 99 ++++++++++++++++++++++++----------------- src/pybuffer.jl | 60 ++++++++++++++++++++++++- test/runtests.jl | 2 + test/testpybuffer.jl | 25 +++++++++++ 6 files changed, 203 insertions(+), 43 deletions(-) create mode 100644 benchmarks/arrayperf.jl create mode 100644 test/testpybuffer.jl diff --git a/benchmarks/arrayperf.jl b/benchmarks/arrayperf.jl new file mode 100644 index 00000000..e805b76e --- /dev/null +++ b/benchmarks/arrayperf.jl @@ -0,0 +1,57 @@ +using PyCall, BenchmarkTools, DataStructures +using PyCall: PyArrayInfoFromBuffer + +results = OrderedDict{String,Any}() + +let + np = pyimport("numpy") + nprand = np["random"]["rand"] + # nparray_pyo(x) = pycall(np["array"], PyObject, x) + # pytestarray(sz::Int...) = pycall(np["reshape"], PyObject, nparray_pyo(1:prod(sz)), sz) + + # no convert baseline + nprand_pyo(sz...) = pycall(nprand, PyObject, sz...) + + for arr_size in [(2,2), (100,100)] + pyo_arr = nprand_pyo(arr_size...) + results["nprand_pyo$arr_size"] = @benchmark $nprand_pyo($arr_size...) + println("nprand_pyo $arr_size:\n"); display(results["nprand_pyo$arr_size"]) + println("--------------------------------------------------") + + results["convert_pyarr$arr_size"] = @benchmark $convert(PyArray, $pyo_arr) + println("convert_pyarr $arr_size:\n"); display(results["convert_pyarr$arr_size"]) + println("--------------------------------------------------") + + results["PyArrayInfoFromBuffer$arr_size"] = @benchmark $PyArrayInfoFromBuffer($pyo_arr) + println("PyArrayInfoFromBuffer $arr_size:\n"); display(results["PyArrayInfoFromBuffer$arr_size"]) + println("--------------------------------------------------") + + results["convert_pyarrbuf$arr_size"] = @benchmark $PyArrayFromBuffer($pyo_arr) + println("convert_pyarrbuf $arr_size:\n"); display(results["convert_pyarrbuf$arr_size"]) + println("--------------------------------------------------") + + results["convert_arr$arr_size"] = @benchmark convert(Array, $pyo_arr) + println("convert_arr $arr_size:\n"); display(results["convert_arr$arr_size"]) + println("--------------------------------------------------") + + results["convert_arrbuf$arr_size"] = @benchmark $ArrayFromBuffer($pyo_arr) + println("convert_arrbuf $arr_size:\n"); display(results["convert_arrbuf$arr_size"]) + println("--------------------------------------------------") + + pyarr = convert(PyArray, pyo_arr) + results["setdata!$arr_size"] = @benchmark $setdata!($pyarr, $pyo_arr) + println("setdata!:\n"); display(results["setdata!$arr_size"]) + println("--------------------------------------------------") + + pyarr = convert(PyArray, pyo_arr) + pybuf=PyBuffer() + results["setdata! bufprealloc$arr_size"] = + @benchmark $setdata!($pyarr, $pyo_arr, $pybuf) + println("setdata! bufprealloc:\n"); display(results["setdata! bufprealloc$arr_size"]) + println("--------------------------------------------------") + end +end +println() +println("Mean times") +println("----------") +foreach((r)->println(rpad(r[1],27), ": ", mean(r[2])), results) diff --git a/src/PyCall.jl b/src/PyCall.jl index 37542e93..9cb0d325 100644 --- a/src/PyCall.jl +++ b/src/PyCall.jl @@ -5,7 +5,8 @@ module PyCall using Compat, VersionParsing export pycall, pyimport, pyimport_e, pybuiltin, PyObject, PyReverseDims, - PyPtr, pyincref, pydecref, pyversion, PyArray, PyArray_Info, + PyPtr, pyincref, pydecref, pyversion, + PyArray, PyArray_Info, PyBuffer, PyArrayFromBuffer, setdata!, ArrayFromBuffer, pyerr_check, pyerr_clear, pytype_query, PyAny, @pyimport, PyDict, pyisinstance, pywrap, pytypeof, pyeval, PyVector, pystring, pystr, pyrepr, pyraise, pytype_mapping, pygui, pygui_start, pygui_stop, diff --git a/src/numpy.jl b/src/numpy.jl index 616c89cc..38941387 100644 --- a/src/numpy.jl +++ b/src/numpy.jl @@ -226,54 +226,63 @@ PyReverseDims(a::AbstractArray) # to call the Python interface to do this, since the equivalent information # in NumPy's C API is only available via macros (or parsing structs). # [ Hopefully, this will be improved in a future NumPy version. ] - -mutable struct PyArray_Info - T::Type +struct PyArray_Info{T,N} native::Bool # native byte order? - sz::Vector{Int} - st::Vector{Int} # strides, in multiples of bytes! - data::Ptr{Cvoid} + sz::NTuple{N,Int} + st::NTuple{N,Int} # strides, in multiples of bytes! + data::Ptr{T} readonly::Bool +end - function PyArray_Info(a::PyObject) - ai = PyDict{AbstractString,PyObject}(a["__array_interface__"]) - typestr = convert(AbstractString, ai["typestr"]) - T = npy_typestrs[typestr[2:end]] - datatuple = convert(Tuple{Int,Bool}, ai["data"]) - sz = convert(Vector{Int}, ai["shape"]) - local st - try - st = isempty(sz) ? Int[] : convert(Vector{Int}, ai["strides"]) - catch - # default is C-order contiguous - st = similar(sz) - st[end] = sizeof(T) - for i = length(sz)-1:-1:1 - st[i] = st[i+1]*sz[i+1] - end +function PyArray_Info(a::PyObject) + ai = PyDict{String,PyObject,true}(a["__array_interface__"]) + typestr = convert(String, ai["typestr"]) + T = npy_typestrs[typestr[2:end]] + datatuple = convert(Tuple{Int,Bool}, ai["data"]) + sz = convert(Tuple{Vararg{Int}}, ai["shape"]) + N = length(sz) + st = if isempty(sz) + () + else + stq = get(ai, "strides", PyNULL()) + if stq.o == pynothing[] || stq.o == C_NULL + default_stride(sz, T) + else + convert(Tuple{Vararg{Int}}, stq) end - return new(T, - (ENDIAN_BOM == 0x04030201 && typestr[1] == '<') - || (ENDIAN_BOM == 0x01020304 && typestr[1] == '>') - || typestr[1] == '|', - sz, st, - convert(Ptr{Cvoid}, datatuple[1]), - datatuple[2]) end + return PyArray_Info{T,N}((ENDIAN_BOM == 0x04030201 && typestr[1] == '<') + || (ENDIAN_BOM == 0x01020304 && typestr[1] == '>') + || typestr[1] == '|', + sz, st, + convert(Ptr{Cvoid}, datatuple[1]), + datatuple[2]) end -aligned(i::PyArray_Info) = # FIXME: also check pointer alignment? - all(m -> m == 0, mod.(i.st, sizeof(i.T))) # strides divisible by elsize +aligned(i::PyArray_Info{T,N}) where {T,N} = # FIXME: also check pointer alignment? + all(m -> m == 0, mod.(i.st, sizeof(T))) # strides divisible by elsize + +eltype(i::PyArray_Info{T,N}) where {T,N} = T +ndims(i::PyArray_Info{T,N}) where {T,N} = N + +function default_stride(sz::NTuple{N, Int}, ::Type{T}) where {T,N} + stv = Vector{Int}(N) + stv[end] = sizeof(T) + for i = N-1:-1:1 + stv[i] = stv[i+1]*sz[i+1] + end + ntuple(i->stv[i], N) +end # whether a contiguous array in column-major (Fortran, Julia) order -function f_contiguous(T::Type, sz::Vector{Int}, st::Vector{Int}) +function f_contiguous(T::Type, sz::NTuple{N,Int}, st::NTuple{N,Int}) where N if prod(sz) == 1 return true end if st[1] != sizeof(T) return false end - for j = 2:length(st) + for j = 2:N if st[j] != st[j-1] * sz[j-1] return false end @@ -281,8 +290,11 @@ function f_contiguous(T::Type, sz::Vector{Int}, st::Vector{Int}) return true end -f_contiguous(i::PyArray_Info) = f_contiguous(i.T, i.sz, i.st) -c_contiguous(i::PyArray_Info) = f_contiguous(i.T, flipdim(i.sz,1), flipdim(i.st,1)) +f_contiguous(T::Type, sz::NTuple{N1,Int}, st::NTuple{N2,Int}) where {N1,N2} = + error("stride and size are different lengths, size: $sz, strides: $sz") + +f_contiguous(i::PyArray_Info{T,N}) where {T,N} = f_contiguous(T, i.sz, i.st) +c_contiguous(i::PyArray_Info{T,N}) where {T,N} = f_contiguous(T, reverse(i.sz), reverse(i.st)) ######################################################################### # PyArray: no-copy wrapper around NumPy ndarray @@ -305,7 +317,7 @@ mutable struct PyArray{T,N} <: AbstractArray{T,N} o::PyObject info::PyArray_Info dims::Dims - st::Vector{Int} + st::NTuple{N,Int} f_contig::Bool c_contig::Bool data::Ptr{T} @@ -315,7 +327,7 @@ mutable struct PyArray{T,N} <: AbstractArray{T,N} throw(ArgumentError("only NPY_ARRAY_ALIGNED arrays are supported")) elseif !info.native throw(ArgumentError("only native byte-order arrays are supported")) - elseif info.T != T + elseif eltype(info) != T throw(ArgumentError("inconsistent type in PyArray constructor")) elseif length(info.sz) != N || length(info.st) != N throw(ArgumentError("inconsistent ndims in PyArray constructor")) @@ -328,7 +340,7 @@ end function PyArray(o::PyObject) info = PyArray_Info(o) - return PyArray{info.T, length(info.sz)}(o, info) + return PyArray{eltype(info), length(info.sz)}(o, info) end size(a::PyArray) = a.dims @@ -336,6 +348,13 @@ ndims(a::PyArray{T,N}) where {T,N} = N similar(a::PyArray, T, dims::Dims) = Array{T}(undef, dims) +function setdata!{T,N}(a::PyArray{T,N}, o::PyObject, pybufinfo=PyBuffer()) + PyBuffer!(pybufinfo, o, PyBUF_ND_CONTIGUOUS) + dataptr = pybufinfo.buf.buf + a.data = reinterpret(Ptr{T}, dataptr) + a +end + function copy(a::PyArray{T,N}) where {T,N} if N > 1 && a.c_contig # equivalent to f_contig with reversed dims B = unsafe_wrap(Array, a.data, ntuple((n -> a.dims[N - n + 1]), N)) @@ -455,7 +474,7 @@ function convert(::Type{Array{T}}, o::PyObject) where T<:NPY_TYPES try info = PyArray_Info(o) try - copy(PyArray{T, length(info.sz)}(o, info)) # will check T == info.T + copy(PyArray{T, length(info.sz)}(o, info)) # will check T == eltype(info) catch return py2array(T, Array{pyany_toany(T)}(undef, info.sz...), o, 1, 1) end @@ -468,7 +487,7 @@ function convert(::Type{Array{T,N}}, o::PyObject) where {T<:NPY_TYPES,N} try info = PyArray_Info(o) try - copy(PyArray{T,N}(o, info)) # will check T == info.T and N == length(info.sz) + copy(PyArray{T,N}(o, info)) # will check T,N == eltype(info),ndims(info) catch nd = length(info.sz) if nd != N diff --git a/src/pybuffer.jl b/src/pybuffer.jl index 06295261..ffcfe12c 100644 --- a/src/pybuffer.jl +++ b/src/pybuffer.jl @@ -86,6 +86,9 @@ function Base.stride(b::PyBuffer, d::Integer) return Int(unsafe_load(b.buf.strides, d)) end +# Strides in bytes +Base.strides(b::PyBuffer) = ((stride(b,i) for i in 1:b.buf.ndim)...,) + # TODO change to `Ref{PyBuffer}` when 0.6 is dropped. iscontiguous(b::PyBuffer) = 1 == ccall((@pysym :PyBuffer_IsContiguous), Cint, @@ -93,7 +96,7 @@ iscontiguous(b::PyBuffer) = ############################################################################# # pybuffer constant values from Include/object.h -const PyBUF_MAX_NDIM = convert(Cint, 64) +const PyBUF_MAX_NDIM = convert(Cint, 64) # == 0x0040, and not in spec? const PyBUF_SIMPLE = convert(Cint, 0) const PyBUF_WRITABLE = convert(Cint, 0x0001) const PyBUF_FORMAT = convert(Cint, 0x0004) @@ -103,10 +106,14 @@ const PyBUF_C_CONTIGUOUS = convert(Cint, 0x0020) | PyBUF_STRIDES const PyBUF_F_CONTIGUOUS = convert(Cint, 0x0040) | PyBUF_STRIDES const PyBUF_ANY_CONTIGUOUS = convert(Cint, 0x0080) | PyBUF_STRIDES const PyBUF_INDIRECT = convert(Cint, 0x0100) | PyBUF_STRIDES +const PyBUF_ND_CONTIGUOUS = Cint(PyBUF_WRITABLE | PyBUF_FORMAT | PyBUF_ND | PyBUF_STRIDES | PyBUF_ANY_CONTIGUOUS) # construct a PyBuffer from a PyObject, if possible function PyBuffer(o::Union{PyObject,PyPtr}, flags=PyBUF_SIMPLE) - b = PyBuffer() + return PyBuffer!(PyBuffer(), o, flags) +end + +function PyBuffer!(b::PyBuffer, o::Union{PyObject,PyPtr}, flags=PyBUF_SIMPLE) # TODO change to `Ref{PyBuffer}` when 0.6 is dropped. @pycheckz ccall((@pysym :PyObject_GetBuffer), Cint, (PyPtr, Any, Cint), o, b, flags) @@ -154,4 +161,53 @@ function Base.write(io::IO, b::PyBuffer) end end +# ref: https://github.com/numpy/numpy/blob/v1.14.2/numpy/core/src/multiarray/buffer.c#L966 +const pybuf_typestrs = Dict{String,DataType}("?"=>Bool, + "b"=>Int8, "B"=>UInt8, + "h"=>Int16, "H"=>UInt16, + "i"=>Int32, "I"=>UInt32, + "l"=>Int64, "L"=>UInt64, + "q"=>Int128, "Q"=>UInt128, + "e"=>Float16, "f"=>Float32, + "d"=>Float64, "g"=>Void, # Float128? + "c8"=>ComplexF32, "c16"=>ComplexF64,) + # "O"=>PyPtr, "O$(div(Sys.WORD_SIZE,8))"=>PyPtr) + +get_typestr(pybuf::PyBuffer) = unsafe_string(convert(Ptr{UInt8}, pybuf.buf.format)) + +function array_info(pybuf::PyBuffer) + typestr = get_typestr(pybuf) + native_byteorder = length(typestr) == 1 || + (ENDIAN_BOM == 0x04030201 && typestr[1] == '<') || + (ENDIAN_BOM == 0x01020304 && typestr[1] == '>') + pybuf_typestrs[typestr[end:end]], native_byteorder +end + +function PyArrayInfoFromBuffer(o::PyObject) + pybuf = PyBuffer(o, PyBUF_ND_CONTIGUOUS) + # XXX pyincref buffer? and add a finalizer to the array that calls pydecref? + T, native_byteorder = array_info(pybuf) + sz = size(pybuf) + N = length(sz) + strd = strides(pybuf) + isreadonly = pybuf.buf.readonly==1 + return PyArray_Info{T,N}(native_byteorder, sz, strd, pybuf.buf.buf, isreadonly) +end + +function PyArrayFromBuffer(o::PyObject) + info = PyArrayInfoFromBuffer(o::PyObject) + PyArray{eltype(info), length(info.sz)}(o, info) +end + +function ArrayFromBuffer(o::PyObject) + pybuf = PyBuffer(o, PyBUF_ND_CONTIGUOUS) + # XXX pyincref buffer? and add a finalizer to the array that calls pydecref? + T, native_byteorder = array_info(pybuf) + !native_byteorder && error("Only native endian format supported, typestr: '$(get_typestr(pybuf))'") + T == Void && error("Array datatype '$(get_typestr(pybuf))' not supported") + # TODO more checks on strides etc + arr = unsafe_wrap(Array, convert(Ptr{T}, pybuf.buf.buf), size(pybuf), false) + f_contiguous(T, strides(pybuf), size(pybuf)) ? arr : PermutedDimsArray(arr, (pybuf.buf.ndim:-1:1)) +end + ############################################################################# diff --git a/test/runtests.jl b/test/runtests.jl index 051619d2..5819437b 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -558,3 +558,5 @@ end @test_throws ErrorException @pywith IgnoreError(false) error() @test (@pywith IgnoreError(true) error(); true) end + +include("testpybuffer.jl") \ No newline at end of file diff --git a/test/testpybuffer.jl b/test/testpybuffer.jl new file mode 100644 index 00000000..f54d454d --- /dev/null +++ b/test/testpybuffer.jl @@ -0,0 +1,25 @@ +using Compat.Test, PyCall, Compat + +pyutf8(s::PyObject) = pycall(s["encode"], PyObject, "utf-8") +pyutf8(s::String) = pyutf8(PyObject(s)) + +const np = pyimport("numpy") + +@testset "PyBuffer" begin + @testset "String Buffers" begin + b = PyCall.PyBuffer(pyutf8("test string")) + @test ndims(b) == 1 + @test (length(b),) == (length("test string"),) == (size(b, 1),) == size(b) + @test stride(b, 1) == 1 + @test PyCall.iscontiguous(b) == true + end + + @testset "Non-native-endian" begin + wrong_endian_str = ENDIAN_BOM == 0x01020304 ? "<" : ">" + wrong_endian_arr = + pycall(np["ndarray"], PyObject, 2; buffer=UInt8[0,1,3,2], + dtype=wrong_endian_str*"i2") + # Not supported, so throws + @test_throws ErrorException ArrayFromBuffer(wrong_endian_arr) + end +end From 955243c9e0d222f52b9db32eed16bded710eb14b Mon Sep 17 00:00:00 2001 From: Joel Mason Date: Sun, 8 Apr 2018 02:38:29 +1000 Subject: [PATCH 02/16] Add isbuftype and use in pysequence_query --- src/PyCall.jl | 2 +- src/conversions.jl | 6 ++---- src/pybuffer.jl | 21 +++++++++++++++++++++ test/testpybuffer.jl | 7 +++++++ 4 files changed, 31 insertions(+), 5 deletions(-) diff --git a/src/PyCall.jl b/src/PyCall.jl index 9cb0d325..4453bef3 100644 --- a/src/PyCall.jl +++ b/src/PyCall.jl @@ -6,7 +6,7 @@ using Compat, VersionParsing export pycall, pyimport, pyimport_e, pybuiltin, PyObject, PyReverseDims, PyPtr, pyincref, pydecref, pyversion, - PyArray, PyArray_Info, PyBuffer, PyArrayFromBuffer, setdata!, ArrayFromBuffer, + PyArray, PyArray_Info, PyBuffer, PyArrayFromBuffer, setdata!, ArrayFromBuffer, isbuftype, pyerr_check, pyerr_clear, pytype_query, PyAny, @pyimport, PyDict, pyisinstance, pywrap, pytypeof, pyeval, PyVector, pystring, pystr, pyrepr, pyraise, pytype_mapping, pygui, pygui_start, pygui_stop, diff --git a/src/conversions.jl b/src/conversions.jl index af224b24..ec31f7bc 100644 --- a/src/conversions.jl +++ b/src/conversions.jl @@ -749,13 +749,11 @@ function pysequence_query(o::PyObject) return AbstractRange elseif ispybytearray(o) return Vector{UInt8} - elseif !haskey(o, "__array_interface__") + elseif !isbuftype(o) # only handle PyList for now return pyisinstance(o, @pyglobalobj :PyList_Type) ? Array : Union{} else - otypestr = get(o["__array_interface__"], PyObject, "typestr") - typestr = convert(AbstractString, otypestr) # Could this just be String now? - T = npy_typestrs[typestr[2:end]] + T, native_byteorder = array_format(o) if T == PyPtr T = PyObject end diff --git a/src/pybuffer.jl b/src/pybuffer.jl index ffcfe12c..c31765ec 100644 --- a/src/pybuffer.jl +++ b/src/pybuffer.jl @@ -120,6 +120,27 @@ function PyBuffer!(b::PyBuffer, o::Union{PyObject,PyPtr}, flags=PyBUF_SIMPLE) return b end +""" +`isbuftype(b::PyBuffer, o::Union{PyObject,PyPtr}, flags=PyBUF_ND_CONTIGUOUS)` +Returns true if the python object `o` supports the buffer protocol. False if not. +""" +function isbuftype(o::Union{PyObject,PyPtr}) + # PyObject_CheckBuffer is defined in a header file here: https://github.com/python/cpython/blob/ef5ce884a41c8553a7eff66ebace908c1dcc1f89/Include/abstract.h#L510 + # so we can't access it easily. It basically just checks if PyObject_GetBuffer exists + # So we'll just try call PyObject_GetBuffer and check for success/failure + b = PyBuffer() + ret = ccall((@pysym :PyObject_GetBuffer), Cint, + (PyPtr, Any, Cint), o, b, PyBUF_ND_CONTIGUOUS) + if ret != 0 + pyerr_clear() + else + # handle pointer types + T, native_byteorder = array_format(b) + T <: Ptr && (ret = 1) + end + return ret == 0 +end + ############################################################################# # recursive function to write buffer dimension by dimension, starting at diff --git a/test/testpybuffer.jl b/test/testpybuffer.jl index f54d454d..483d6653 100644 --- a/test/testpybuffer.jl +++ b/test/testpybuffer.jl @@ -22,4 +22,11 @@ const np = pyimport("numpy") # Not supported, so throws @test_throws ErrorException ArrayFromBuffer(wrong_endian_arr) end + + @testset "isbuftype" begin + @test isbuftype(PyObject(0)) == false + @test isbuftype(listpyo((1.0:10.0...))) == false + @test isbuftype(arrpyo("d", 1.0:10.0)) == true + @test isbuftype(PyObject([1:10...])) == true + end end From 7fde0ec79c370c0ecca4d43564ee3f63fe49c9d6 Mon Sep 17 00:00:00 2001 From: Joel Mason Date: Sun, 8 Apr 2018 02:44:14 +1000 Subject: [PATCH 03/16] Increase robustness in parsing buffer format string --- src/numpy.jl | 3 +- src/pybuffer.jl | 69 ++++++++++++++++++++++++++++++++++---------- test/runtests.jl | 10 +++---- test/testpybuffer.jl | 10 +++++-- 4 files changed, 69 insertions(+), 23 deletions(-) diff --git a/src/numpy.jl b/src/numpy.jl index 38941387..16c3b192 100644 --- a/src/numpy.jl +++ b/src/numpy.jl @@ -276,7 +276,8 @@ end # whether a contiguous array in column-major (Fortran, Julia) order function f_contiguous(T::Type, sz::NTuple{N,Int}, st::NTuple{N,Int}) where N - if prod(sz) == 1 + if prod(sz) == 1 || length(sz) == 1 + # 0 or 1-dim arrays should default to f-contiguous in julia return true end if st[1] != sizeof(T) diff --git a/src/pybuffer.jl b/src/pybuffer.jl index c31765ec..a2ba9095 100644 --- a/src/pybuffer.jl +++ b/src/pybuffer.jl @@ -96,7 +96,6 @@ iscontiguous(b::PyBuffer) = ############################################################################# # pybuffer constant values from Include/object.h -const PyBUF_MAX_NDIM = convert(Cint, 64) # == 0x0040, and not in spec? const PyBUF_SIMPLE = convert(Cint, 0) const PyBUF_WRITABLE = convert(Cint, 0x0001) const PyBUF_FORMAT = convert(Cint, 0x0004) @@ -183,31 +182,71 @@ function Base.write(io::IO, b::PyBuffer) end # ref: https://github.com/numpy/numpy/blob/v1.14.2/numpy/core/src/multiarray/buffer.c#L966 -const pybuf_typestrs = Dict{String,DataType}("?"=>Bool, + +const standard_typestrs = Dict{String,DataType}( + "?"=>Bool, "P"=>Ptr{Void}, "b"=>Int8, "B"=>UInt8, "h"=>Int16, "H"=>UInt16, "i"=>Int32, "I"=>UInt32, - "l"=>Int64, "L"=>UInt64, - "q"=>Int128, "Q"=>UInt128, + "l"=>Int32, "L"=>UInt32, + "q"=>Int64, "Q"=>UInt64, "e"=>Float16, "f"=>Float32, "d"=>Float64, "g"=>Void, # Float128? - "c8"=>ComplexF32, "c16"=>ComplexF64,) - # "O"=>PyPtr, "O$(div(Sys.WORD_SIZE,8))"=>PyPtr) + "Z8"=>ComplexF32, "Z16"=>ComplexF64, + "Zf"=>ComplexF32, "Zd"=>ComplexF64) + +const native_typestrs = Dict{String,DataType}( + "?"=>Bool, "P"=>Ptr{Void}, + "b"=>Int8, "B"=>UInt8, + "h"=>Cshort, "H"=>Cushort, + "i"=>Cint, "I"=>Cuint, + "l"=>Clong, "L"=>Culong, + "q"=>Clonglong, "Q"=>Culonglong, + "e"=>Float16, "f"=>Cfloat, + "d"=>Cdouble, "g"=>Void, # Float128? + "Z8"=>ComplexF32, "Z16"=>ComplexF64, + "Zf"=>ComplexF32, "Zd"=>ComplexF64) -get_typestr(pybuf::PyBuffer) = unsafe_string(convert(Ptr{UInt8}, pybuf.buf.format)) +get_format_str(pybuf::PyBuffer) = unsafe_string(convert(Ptr{UInt8}, pybuf.buf.format)) -function array_info(pybuf::PyBuffer) - typestr = get_typestr(pybuf) - native_byteorder = length(typestr) == 1 || - (ENDIAN_BOM == 0x04030201 && typestr[1] == '<') || - (ENDIAN_BOM == 0x01020304 && typestr[1] == '>') - pybuf_typestrs[typestr[end:end]], native_byteorder +function array_format(pybuf::PyBuffer) + # a NULL-terminated format-string ... indicating what is in each element of memory. + # TODO: handle more cases: https://www.python.org/dev/peps/pep-3118/#additions-to-the-struct-string-syntax + # refs: https://github.com/numpy/numpy/blob/v1.14.2/numpy/core/src/multiarray/buffer.c#L966 + # https://github.com/numpy/numpy/blob/v1.14.2/numpy/core/_internal.py#L490 + # + + # "NULL implies standard unsigned bytes ("B")" --pep 3118 + pybuf.buf.format == C_NULL && return UInt8, true + + fmt_str = get_format_str(pybuf) + native_byteorder = true + type_start_idx = 1 + typestrs = standard_typestrs + if length(fmt_str) > 1 + type_start_idx = 2 + if fmt_str[1] == '=' + elseif fmt_str[1] == '<' + native_byteorder = ENDIAN_BOM == 0x04030201 + elseif fmt_str[1] == '>' || fmt_str =='!' + native_byteorder = ENDIAN_BOM == 0x01020304 + elseif fmt_str[1] == '@' || fmt_str[1] == '^' + typestrs = native_typestrs + elseif fmt_str[1] == "Z" + type_start_idx = 1 + else + error("Unsupported format string: \"$fmt_str\"") + end + end + typestrs[fmt_str[type_start_idx:end]], native_byteorder end +array_format(o::PyObject) = array_format(PyBuffer(o, PyBUF_ND_CONTIGUOUS)) + function PyArrayInfoFromBuffer(o::PyObject) + # n.b. the pydecref(::PyBuffer) finalizer handles releasing the PyBuffer pybuf = PyBuffer(o, PyBUF_ND_CONTIGUOUS) - # XXX pyincref buffer? and add a finalizer to the array that calls pydecref? - T, native_byteorder = array_info(pybuf) + T, native_byteorder = array_format(pybuf) sz = size(pybuf) N = length(sz) strd = strides(pybuf) diff --git a/test/runtests.jl b/test/runtests.jl index 5819437b..a7d3b282 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -263,11 +263,11 @@ pymodule_exists(s::AbstractString) = !ispynull(pyimport_e(s)) @test o[:real] == 1 end - # []-based sequence access - let a1=[5,8,6], a2=rand(3,4), a3=rand(3,4,5), o1=PyObject(a1), o2=PyObject(a2), o3=PyObject(a3) + @testset "[]-based sequence access" begin + a1=[5,8,6]; a2=rand(3,4); a3=rand(3,4,5); o1=PyObject(a1); o2=PyObject(a2); o3=PyObject(a3) @test [o1[i] for i in eachindex(a1)] == a1 @test [o1[end-(i-1)] for i in eachindex(a1)] == reverse(a1) - @test o2[1] == collect(a2[1,:]) + @test all(o2[1] == collect(a2[1,:])) @test length(o1) == length(o2) == length(o3) == 3 o1[end-1] = 7 @test o1[2] == 7 @@ -277,7 +277,7 @@ pymodule_exists(s::AbstractString) = !ispynull(pyimport_e(s)) if PyCall.npy_initialized @test [o2[i,j] for i=1:3, j=1:4] == a2 @test [o3[i,j,k] for i=1:3, j=1:4, k=1:5] == a3 - @test o3[2,3] == collect(a3[2,3,:]) + @test all(o3[2,3] == collect(a3[2,3,:])) o2[2,3] = 8 @test o2[2,3] == 8 o3[2,3,4] = 9 @@ -559,4 +559,4 @@ end @test (@pywith IgnoreError(true) error(); true) end -include("testpybuffer.jl") \ No newline at end of file +include("testpybuffer.jl") diff --git a/test/testpybuffer.jl b/test/testpybuffer.jl index 483d6653..7d11c3d6 100644 --- a/test/testpybuffer.jl +++ b/test/testpybuffer.jl @@ -3,9 +3,15 @@ using Compat.Test, PyCall, Compat pyutf8(s::PyObject) = pycall(s["encode"], PyObject, "utf-8") pyutf8(s::String) = pyutf8(PyObject(s)) -const np = pyimport("numpy") - @testset "PyBuffer" begin + np = pyimport("numpy") + # arrpyo = pyimport("array")["array"]=>PyObject + # listpyo = pybuiltin("list")=>PyObject + arrpy = pyimport("array")["array"] + listpy = pybuiltin("list") + arrpyo(args...) = pycall(arrpy, PyObject, args...) + listpyo(args...) = pycall(listpy, PyObject, args...) + @testset "String Buffers" begin b = PyCall.PyBuffer(pyutf8("test string")) @test ndims(b) == 1 From b8a9dcde68093125d73405bd99d2dde9bd440ca9 Mon Sep 17 00:00:00 2001 From: Joel Mason Date: Sun, 8 Apr 2018 02:44:34 +1000 Subject: [PATCH 04/16] ArrayFromBuffer Improvements --- src/pybuffer.jl | 12 +++++++----- test/testpybuffer.jl | 13 +++++++++++++ 2 files changed, 20 insertions(+), 5 deletions(-) diff --git a/src/pybuffer.jl b/src/pybuffer.jl index a2ba9095..f2b4a754 100644 --- a/src/pybuffer.jl +++ b/src/pybuffer.jl @@ -260,14 +260,16 @@ function PyArrayFromBuffer(o::PyObject) end function ArrayFromBuffer(o::PyObject) + # n.b. the pydecref(::PyBuffer) finalizer handles releasing the PyBuffer pybuf = PyBuffer(o, PyBUF_ND_CONTIGUOUS) - # XXX pyincref buffer? and add a finalizer to the array that calls pydecref? - T, native_byteorder = array_info(pybuf) - !native_byteorder && error("Only native endian format supported, typestr: '$(get_typestr(pybuf))'") - T == Void && error("Array datatype '$(get_typestr(pybuf))' not supported") + T, native_byteorder = array_format(pybuf) + !native_byteorder && error("Only native endian format supported, format string: '$(get_format_str(pybuf))'") + T == Void && error("Array datatype '$(get_format_str(pybuf))' not supported") # TODO more checks on strides etc arr = unsafe_wrap(Array, convert(Ptr{T}, pybuf.buf.buf), size(pybuf), false) - f_contiguous(T, strides(pybuf), size(pybuf)) ? arr : PermutedDimsArray(arr, (pybuf.buf.ndim:-1:1)) + !f_contiguous(T, size(pybuf), strides(pybuf)) && + (arr = PermutedDimsArray(arr, (pybuf.buf.ndim:-1:1))) + return arr end ############################################################################# diff --git a/test/testpybuffer.jl b/test/testpybuffer.jl index 7d11c3d6..3d6c5bb5 100644 --- a/test/testpybuffer.jl +++ b/test/testpybuffer.jl @@ -1,4 +1,5 @@ using Compat.Test, PyCall, Compat +using PyCall: f_contiguous, PyBUF_ND_CONTIGUOUS, array_format pyutf8(s::PyObject) = pycall(s["encode"], PyObject, "utf-8") pyutf8(s::String) = pyutf8(PyObject(s)) @@ -29,6 +30,18 @@ pyutf8(s::String) = pyutf8(PyObject(s)) @test_throws ErrorException ArrayFromBuffer(wrong_endian_arr) end + @testset "ArrayFromBuffer" begin + ao = arrpyo("d", 1.0:10.0) + pybuf = PyBuffer(ao, PyBUF_ND_CONTIGUOUS) + T, native_byteorder = array_format(pybuf) + @test T == Float64 + @test native_byteorder == true + @test size(pybuf) == (10,) + @test strides(pybuf) == (sizeof(T),) + @test !(ArrayFromBuffer(ao) isa PermutedDimsArray) + @test ArrayFromBuffer(ao) isa Array + end + @testset "isbuftype" begin @test isbuftype(PyObject(0)) == false @test isbuftype(listpyo((1.0:10.0...))) == false From a0082c514c23432931665bc801e74d0f4a3a62fa Mon Sep 17 00:00:00 2001 From: Joel Mason Date: Mon, 9 Apr 2018 01:20:40 +1000 Subject: [PATCH 05/16] ArgumentError for non-native endian and typestrs_native --- src/pybuffer.jl | 11 ++++++++--- test/testpybuffer.jl | 3 ++- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/src/pybuffer.jl b/src/pybuffer.jl index f2b4a754..f6beed92 100644 --- a/src/pybuffer.jl +++ b/src/pybuffer.jl @@ -207,6 +207,9 @@ const native_typestrs = Dict{String,DataType}( "Z8"=>ComplexF32, "Z16"=>ComplexF64, "Zf"=>ComplexF32, "Zd"=>ComplexF64) +const typestrs_native = + Dict{DataType, String}(zip(values(native_typestrs), keys(native_typestrs))) + get_format_str(pybuf::PyBuffer) = unsafe_string(convert(Ptr{UInt8}, pybuf.buf.format)) function array_format(pybuf::PyBuffer) @@ -214,7 +217,7 @@ function array_format(pybuf::PyBuffer) # TODO: handle more cases: https://www.python.org/dev/peps/pep-3118/#additions-to-the-struct-string-syntax # refs: https://github.com/numpy/numpy/blob/v1.14.2/numpy/core/src/multiarray/buffer.c#L966 # https://github.com/numpy/numpy/blob/v1.14.2/numpy/core/_internal.py#L490 - # + # https://docs.python.org/2/library/struct.html#byte-order-size-and-alignment # "NULL implies standard unsigned bytes ("B")" --pep 3118 pybuf.buf.format == C_NULL && return UInt8, true @@ -263,8 +266,10 @@ function ArrayFromBuffer(o::PyObject) # n.b. the pydecref(::PyBuffer) finalizer handles releasing the PyBuffer pybuf = PyBuffer(o, PyBUF_ND_CONTIGUOUS) T, native_byteorder = array_format(pybuf) - !native_byteorder && error("Only native endian format supported, format string: '$(get_format_str(pybuf))'") - T == Void && error("Array datatype '$(get_format_str(pybuf))' not supported") + !native_byteorder && throw(ArgumentError( + "Only native endian format supported, format string: '$(get_format_str(pybuf))'")) + T == Void && throw(ArgumentError( + "Array datatype '$(get_format_str(pybuf))' not supported")) # TODO more checks on strides etc arr = unsafe_wrap(Array, convert(Ptr{T}, pybuf.buf.buf), size(pybuf), false) !f_contiguous(T, size(pybuf), strides(pybuf)) && diff --git a/test/testpybuffer.jl b/test/testpybuffer.jl index 3d6c5bb5..0ad3696a 100644 --- a/test/testpybuffer.jl +++ b/test/testpybuffer.jl @@ -27,7 +27,8 @@ pyutf8(s::String) = pyutf8(PyObject(s)) pycall(np["ndarray"], PyObject, 2; buffer=UInt8[0,1,3,2], dtype=wrong_endian_str*"i2") # Not supported, so throws - @test_throws ErrorException ArrayFromBuffer(wrong_endian_arr) + @test_throws ArgumentError ArrayFromBuffer(wrong_endian_arr) + @test_throws ArgumentError PyArrayFromBuffer(wrong_endian_arr) end @testset "ArrayFromBuffer" begin From d67e19ededce708a6f3efe1e8a22e7f2fe6d9080 Mon Sep 17 00:00:00 2001 From: Joel Mason Date: Tue, 17 Apr 2018 00:02:27 +1000 Subject: [PATCH 06/16] Move PyArray related things to their own file rename ArrayFromBuffer to NoCopyArray, and make indexing of NoCopyArray match py indexing for row major arrays too --- benchmarks/arrayperf.jl | 10 +- src/PyCall.jl | 3 +- src/numpy.jl | 339 +--------------------------------------- src/pyarray.jl | 317 +++++++++++++++++++++++++++++++++++++ src/pybuffer.jl | 33 ---- test/testpybuffer.jl | 62 ++++++-- 6 files changed, 382 insertions(+), 382 deletions(-) create mode 100644 src/pyarray.jl diff --git a/benchmarks/arrayperf.jl b/benchmarks/arrayperf.jl index e805b76e..b5a46f06 100644 --- a/benchmarks/arrayperf.jl +++ b/benchmarks/arrayperf.jl @@ -1,5 +1,5 @@ using PyCall, BenchmarkTools, DataStructures -using PyCall: PyArrayInfoFromBuffer +using PyCall: PyArray_Info results = OrderedDict{String,Any}() @@ -22,11 +22,11 @@ let println("convert_pyarr $arr_size:\n"); display(results["convert_pyarr$arr_size"]) println("--------------------------------------------------") - results["PyArrayInfoFromBuffer$arr_size"] = @benchmark $PyArrayInfoFromBuffer($pyo_arr) - println("PyArrayInfoFromBuffer $arr_size:\n"); display(results["PyArrayInfoFromBuffer$arr_size"]) + results["PyArray_Info$arr_size"] = @benchmark $PyArray_Info($pyo_arr) + println("PyArray_Info $arr_size:\n"); display(results["PyArray_Info$arr_size"]) println("--------------------------------------------------") - results["convert_pyarrbuf$arr_size"] = @benchmark $PyArrayFromBuffer($pyo_arr) + results["convert_pyarrbuf$arr_size"] = @benchmark $PyArray($pyo_arr) println("convert_pyarrbuf $arr_size:\n"); display(results["convert_pyarrbuf$arr_size"]) println("--------------------------------------------------") @@ -34,7 +34,7 @@ let println("convert_arr $arr_size:\n"); display(results["convert_arr$arr_size"]) println("--------------------------------------------------") - results["convert_arrbuf$arr_size"] = @benchmark $ArrayFromBuffer($pyo_arr) + results["convert_arrbuf$arr_size"] = @benchmark $NoCopyArray($pyo_arr) println("convert_arrbuf $arr_size:\n"); display(results["convert_arrbuf$arr_size"]) println("--------------------------------------------------") diff --git a/src/PyCall.jl b/src/PyCall.jl index 4453bef3..57f219bc 100644 --- a/src/PyCall.jl +++ b/src/PyCall.jl @@ -6,7 +6,7 @@ using Compat, VersionParsing export pycall, pyimport, pyimport_e, pybuiltin, PyObject, PyReverseDims, PyPtr, pyincref, pydecref, pyversion, - PyArray, PyArray_Info, PyBuffer, PyArrayFromBuffer, setdata!, ArrayFromBuffer, isbuftype, + PyArray, PyArray_Info, PyBuffer, setdata!, NoCopyArray, isbuftype, pyerr_check, pyerr_clear, pytype_query, PyAny, @pyimport, PyDict, pyisinstance, pywrap, pytypeof, pyeval, PyVector, pystring, pystr, pyrepr, pyraise, pytype_mapping, pygui, pygui_start, pygui_stop, @@ -174,6 +174,7 @@ PyObject(o::PyPtr, keep::Any) = pyembed(PyObject(o), keep) const TypeTuple = Union{Type,NTuple{N, Type}} where {N} include("pybuffer.jl") +include("pyarray.jl") include("conversions.jl") include("pytype.jl") include("pyiterator.jl") diff --git a/src/numpy.jl b/src/numpy.jl index 16c3b192..e41fd1a2 100644 --- a/src/numpy.jl +++ b/src/numpy.jl @@ -102,7 +102,7 @@ end # the values of these seem to have been stable for some time, and # the NumPy developers seem to have some awareness of binary compatibility -# NPY_TYPES: +# NumPy Types: const NPY_BOOL = Int32(0) const NPY_BYTE = Int32(1) @@ -127,26 +127,8 @@ const NPY_UNICODE = Int32(19) const NPY_VOID = Int32(20) const NPY_HALF = Int32(23) -# NPY_ORDER: -const NPY_ANYORDER = Int32(-1) -const NPY_CORDER = Int32(0) -const NPY_FORTRANORDER = Int32(1) - -# flags: -const NPY_ARRAY_C_CONTIGUOUS = Int32(1) -const NPY_ARRAY_F_CONTIGUOUS = Int32(2) -const NPY_ARRAY_ALIGNED = Int32(0x0100) -const NPY_ARRAY_WRITEABLE = Int32(0x0400) -const NPY_ARRAY_OWNDATA = Int32(0x0004) -const NPY_ARRAY_ENSURECOPY = Int32(0x0020) -const NPY_ARRAY_ENSUREARRAY = Int32(0x0040) -const NPY_ARRAY_FORCECAST = Int32(0x0010) -const NPY_ARRAY_UPDATEIFCOPY = Int32(0x1000) -const NPY_ARRAY_NOTSWAPPED = Int32(0x0200) -const NPY_ARRAY_ELEMENTSTRIDES = Int32(0x0080) - ######################################################################### -# conversion from Julia types to NPY_TYPES constant +# conversion from Julia types to NumPy types npy_type(::Type{Bool}) = NPY_BOOL npy_type(::Type{Int8}) = NPY_BYTE @@ -164,18 +146,9 @@ npy_type(::Type{ComplexF32}) = NPY_CFLOAT npy_type(::Type{ComplexF64}) = NPY_CDOUBLE npy_type(::Type{PyPtr}) = NPY_OBJECT -const NPY_TYPES = Union{Bool,Int8,UInt8,Int16,UInt16,Int32,UInt32,Int64,UInt64,Float16,Float32,Float64,ComplexF32,ComplexF64,PyPtr} - -# conversions from __array_interface__ type strings to supported Julia types -const npy_typestrs = Dict( "b1"=>Bool, - "i1"=>Int8, "u1"=>UInt8, - "i2"=>Int16, "u2"=>UInt16, - "i4"=>Int32, "u4"=>UInt32, - "i8"=>Int64, "u8"=>UInt64, - "f2"=>Float16, "f4"=>Float32, - "f8"=>Float64, "c8"=>ComplexF32, - "c16"=>ComplexF64, "O"=>PyPtr, - "O$(div(Sys.WORD_SIZE,8))"=>PyPtr) +# flags: +const NPY_ARRAY_ALIGNED = Int32(0x0100) +const NPY_ARRAY_WRITEABLE = Int32(0x0400) ######################################################################### # no-copy conversion of Julia arrays to NumPy arrays. @@ -185,7 +158,7 @@ const npy_typestrs = Dict( "b1"=>Bool, # dimensions. For example, although NumPy works with both row-major and # column-major data, some Python libraries like OpenCV seem to require # row-major data (the default in NumPy). In such cases, use PyReverseDims(array) -function NpyArray(a::StridedArray{T}, revdims::Bool) where T<:NPY_TYPES +function NpyArray(a::StridedArray{T}, revdims::Bool) where T<:PYARR_TYPES @npyinitialize size_a = revdims ? reverse(size(a)) : size(a) strides_a = revdims ? reverse(strides(a)) : strides(a) @@ -199,7 +172,7 @@ function NpyArray(a::StridedArray{T}, revdims::Bool) where T<:NPY_TYPES return PyObject(p, a) end -function PyObject(a::StridedArray{T}) where T<:NPY_TYPES +function PyObject(a::StridedArray{T}) where T<:PYARR_TYPES try return NpyArray(a, false) catch @@ -207,7 +180,7 @@ function PyObject(a::StridedArray{T}) where T<:NPY_TYPES end end -PyReverseDims(a::StridedArray{T}) where {T<:NPY_TYPES} = NpyArray(a, true) +PyReverseDims(a::StridedArray{T}) where {T<:PYARR_TYPES} = NpyArray(a, true) PyReverseDims(a::BitArray) = PyReverseDims(Array(a)) """ @@ -222,299 +195,3 @@ libraries that expect row-major data. PyReverseDims(a::AbstractArray) ######################################################################### -# Extract shape and other information about a NumPy array. We need -# to call the Python interface to do this, since the equivalent information -# in NumPy's C API is only available via macros (or parsing structs). -# [ Hopefully, this will be improved in a future NumPy version. ] -struct PyArray_Info{T,N} - native::Bool # native byte order? - sz::NTuple{N,Int} - st::NTuple{N,Int} # strides, in multiples of bytes! - data::Ptr{T} - readonly::Bool -end - -function PyArray_Info(a::PyObject) - ai = PyDict{String,PyObject,true}(a["__array_interface__"]) - typestr = convert(String, ai["typestr"]) - T = npy_typestrs[typestr[2:end]] - datatuple = convert(Tuple{Int,Bool}, ai["data"]) - sz = convert(Tuple{Vararg{Int}}, ai["shape"]) - N = length(sz) - st = if isempty(sz) - () - else - stq = get(ai, "strides", PyNULL()) - if stq.o == pynothing[] || stq.o == C_NULL - default_stride(sz, T) - else - convert(Tuple{Vararg{Int}}, stq) - end - end - return PyArray_Info{T,N}((ENDIAN_BOM == 0x04030201 && typestr[1] == '<') - || (ENDIAN_BOM == 0x01020304 && typestr[1] == '>') - || typestr[1] == '|', - sz, st, - convert(Ptr{Cvoid}, datatuple[1]), - datatuple[2]) -end - -aligned(i::PyArray_Info{T,N}) where {T,N} = # FIXME: also check pointer alignment? - all(m -> m == 0, mod.(i.st, sizeof(T))) # strides divisible by elsize - -eltype(i::PyArray_Info{T,N}) where {T,N} = T -ndims(i::PyArray_Info{T,N}) where {T,N} = N - -function default_stride(sz::NTuple{N, Int}, ::Type{T}) where {T,N} - stv = Vector{Int}(N) - stv[end] = sizeof(T) - for i = N-1:-1:1 - stv[i] = stv[i+1]*sz[i+1] - end - ntuple(i->stv[i], N) -end - -# whether a contiguous array in column-major (Fortran, Julia) order -function f_contiguous(T::Type, sz::NTuple{N,Int}, st::NTuple{N,Int}) where N - if prod(sz) == 1 || length(sz) == 1 - # 0 or 1-dim arrays should default to f-contiguous in julia - return true - end - if st[1] != sizeof(T) - return false - end - for j = 2:N - if st[j] != st[j-1] * sz[j-1] - return false - end - end - return true -end - -f_contiguous(T::Type, sz::NTuple{N1,Int}, st::NTuple{N2,Int}) where {N1,N2} = - error("stride and size are different lengths, size: $sz, strides: $sz") - -f_contiguous(i::PyArray_Info{T,N}) where {T,N} = f_contiguous(T, i.sz, i.st) -c_contiguous(i::PyArray_Info{T,N}) where {T,N} = f_contiguous(T, reverse(i.sz), reverse(i.st)) - -######################################################################### -# PyArray: no-copy wrapper around NumPy ndarray -# -# Hopefully, in the future this can be a subclass of StridedArray (see -# Julia issue #2345), which will allow it to be used with most Julia -# functions, but that is not possible at the moment. So, to use this -# with Julia linalg functions etcetera a copy is still required. - -""" - PyArray(o::PyObject) - -This converts an `ndarray` object `o` to a PyArray. - -This implements a nocopy wrapper to a NumPy array (currently of only numeric types only). - -If you are using `pycall` and the function returns an `ndarray`, you can use `PyArray` as the return type to directly receive a `PyArray`. -""" -mutable struct PyArray{T,N} <: AbstractArray{T,N} - o::PyObject - info::PyArray_Info - dims::Dims - st::NTuple{N,Int} - f_contig::Bool - c_contig::Bool - data::Ptr{T} - - function PyArray{T,N}(o::PyObject, info::PyArray_Info) where {T,N} - if !aligned(info) - throw(ArgumentError("only NPY_ARRAY_ALIGNED arrays are supported")) - elseif !info.native - throw(ArgumentError("only native byte-order arrays are supported")) - elseif eltype(info) != T - throw(ArgumentError("inconsistent type in PyArray constructor")) - elseif length(info.sz) != N || length(info.st) != N - throw(ArgumentError("inconsistent ndims in PyArray constructor")) - end - return new{T,N}(o, info, tuple(info.sz...), div.(info.st, sizeof(T)), - f_contiguous(info), c_contiguous(info), - convert(Ptr{T}, info.data)) - end -end - -function PyArray(o::PyObject) - info = PyArray_Info(o) - return PyArray{eltype(info), length(info.sz)}(o, info) -end - -size(a::PyArray) = a.dims -ndims(a::PyArray{T,N}) where {T,N} = N - -similar(a::PyArray, T, dims::Dims) = Array{T}(undef, dims) - -function setdata!{T,N}(a::PyArray{T,N}, o::PyObject, pybufinfo=PyBuffer()) - PyBuffer!(pybufinfo, o, PyBUF_ND_CONTIGUOUS) - dataptr = pybufinfo.buf.buf - a.data = reinterpret(Ptr{T}, dataptr) - a -end - -function copy(a::PyArray{T,N}) where {T,N} - if N > 1 && a.c_contig # equivalent to f_contig with reversed dims - B = unsafe_wrap(Array, a.data, ntuple((n -> a.dims[N - n + 1]), N)) - return permutedims(B, (N:-1:1)) - end - A = Array{T}(undef, a.dims) - if a.f_contig - ccall(:memcpy, Cvoid, (Ptr{T}, Ptr{T}, Int), A, a, sizeof(T)*length(a)) - return A - else - return copyto!(A, a) - end -end - -# TODO: need to do bounds-checking of these indices! - -getindex(a::PyArray{T,0}) where {T} = unsafe_load(a.data) -getindex(a::PyArray{T,1}, i::Integer) where {T} = unsafe_load(a.data, 1 + (i-1)*a.st[1]) - -getindex(a::PyArray{T,2}, i::Integer, j::Integer) where {T} = - unsafe_load(a.data, 1 + (i-1)*a.st[1] + (j-1)*a.st[2]) - -function getindex(a::PyArray, i::Integer) - if a.f_contig - return unsafe_load(a.data, i) - else - return a[ind2sub(a.dims, i)...] - end -end - -function getindex(a::PyArray, is::Integer...) - index = 1 - n = min(length(is),length(a.st)) - for i = 1:n - index += (is[i]-1)*a.st[i] - end - for i = n+1:length(is) - if is[i] != 1 - throw(BoundsError()) - end - end - unsafe_load(a.data, index) -end - -function writeok_assign(a::PyArray, v, i::Integer) - if a.info.readonly - throw(ArgumentError("read-only PyArray")) - else - unsafe_store!(a.data, v, i) - end - return a -end - -setindex!(a::PyArray{T,0}, v) where {T} = writeok_assign(a, v, 1) -setindex!(a::PyArray{T,1}, v, i::Integer) where {T} = writeok_assign(a, v, 1 + (i-1)*a.st[1]) - -setindex!(a::PyArray{T,2}, v, i::Integer, j::Integer) where {T} = - writeok_assign(a, v, 1 + (i-1)*a.st[1] + (j-1)*a.st[2]) - -function setindex!(a::PyArray, v, i::Integer) - if a.f_contig - return writeok_assign(a, v, i) - else - return setindex!(a, v, ind2sub(a.dims, i)...) - end -end - -function setindex!(a::PyArray, v, is::Integer...) - index = 1 - n = min(length(is),length(a.st)) - for i = 1:n - index += (is[i]-1)*a.st[i] - end - for i = n+1:length(is) - if is[i] != 1 - throw(BoundsError()) - end - end - writeok_assign(a, v, index) -end - -stride(a::PyArray, i::Integer) = a.st[i] - -Base.unsafe_convert(::Type{Ptr{T}}, a::PyArray{T}) where {T} = a.data - -pointer(a::PyArray, i::Int) = pointer(a, ind2sub(a.dims, i)) - -function pointer(a::PyArray{T}, is::Tuple{Vararg{Int}}) where T - offset = 0 - for i = 1:length(is) - offset += (is[i]-1)*a.st[i] - end - return a.data + offset*sizeof(T) -end - -summary(a::PyArray{T}) where {T} = string(Base.dims2string(size(a)), " ", - string(T), " PyArray") - -######################################################################### -# PyArray <-> PyObject conversions - -PyObject(a::PyArray) = a.o - -convert(::Type{PyArray}, o::PyObject) = PyArray(o) - -function convert(::Type{Array{T, 1}}, o::PyObject) where T<:NPY_TYPES - try - copy(PyArray{T, 1}(o, PyArray_Info(o))) # will check T and N vs. info - catch - len = @pycheckz ccall((@pysym :PySequence_Size), Int, (PyPtr,), o) - A = Array{pyany_toany(T)}(undef, len) - py2array(T, A, o, 1, 1) - end -end - -function convert(::Type{Array{T}}, o::PyObject) where T<:NPY_TYPES - try - info = PyArray_Info(o) - try - copy(PyArray{T, length(info.sz)}(o, info)) # will check T == eltype(info) - catch - return py2array(T, Array{pyany_toany(T)}(undef, info.sz...), o, 1, 1) - end - catch - py2array(T, o) - end -end - -function convert(::Type{Array{T,N}}, o::PyObject) where {T<:NPY_TYPES,N} - try - info = PyArray_Info(o) - try - copy(PyArray{T,N}(o, info)) # will check T,N == eltype(info),ndims(info) - catch - nd = length(info.sz) - if nd != N - throw(ArgumentError("cannot convert $(nd)d array to $(N)d")) - end - return py2array(T, Array{pyany_toany(T)}(undef, info.sz...), o, 1, 1) - end - catch - A = py2array(T, o) - if ndims(A) != N - throw(ArgumentError("cannot convert $(ndims(A))d array to $(N)d")) - end - A - end -end - -function convert(::Type{Array{PyObject}}, o::PyObject) - map(pyincref, convert(Array{PyPtr}, o)) -end - -function convert(::Type{Array{PyObject,1}}, o::PyObject) - map(pyincref, convert(Array{PyPtr, 1}, o)) -end - -function convert(::Type{Array{PyObject,N}}, o::PyObject) where N - map(pyincref, convert(Array{PyPtr, N}, o)) -end - -######################################################################### diff --git a/src/pyarray.jl b/src/pyarray.jl new file mode 100644 index 00000000..3b28d1f3 --- /dev/null +++ b/src/pyarray.jl @@ -0,0 +1,317 @@ +######################################################################### +# Extract shape and other information about arrays that support Python's +# Buffer Interface/Protocol (PEP 3118) +######################################################################### +struct PyArray_Info{T,N} + native::Bool # native byte order? + sz::NTuple{N,Int} + st::NTuple{N,Int} # strides, in multiples of bytes! + data::Ptr{T} + readonly::Bool +end + +function PyArray_Info(o::PyObject) + # n.b. the pydecref(::PyBuffer) finalizer handles releasing the PyBuffer + pybuf = PyBuffer(o, PyBUF_ND_CONTIGUOUS) + T, native_byteorder = array_format(pybuf) + sz = size(pybuf) + strd = strides(pybuf) + length(strd) == 0 && (sz = ()) + N = length(sz) + isreadonly = pybuf.buf.readonly==1 + return PyArray_Info{T,N}(native_byteorder, sz, strd, pybuf.buf.buf, isreadonly) +end + +aligned(i::PyArray_Info{T,N}) where {T,N} = # FIXME: also check pointer alignment? + all(m -> m == 0, mod.(i.st, sizeof(T))) # strides divisible by elsize + +eltype(i::PyArray_Info{T,N}) where {T,N} = T +ndims(i::PyArray_Info{T,N}) where {T,N} = N + +function default_stride(sz::NTuple{N, Int}, ::Type{T}) where {T,N} + stv = Vector{Int}(N) + stv[end] = sizeof(T) + for i = N-1:-1:1 + stv[i] = stv[i+1]*sz[i+1] + end + ntuple(i->stv[i], N) +end + +# whether a contiguous array in column-major (Fortran, Julia) order +function f_contiguous(T::Type, sz::NTuple{N,Int}, st::NTuple{N,Int}) where N + if prod(sz) == 1 || length(sz) == 1 + # 0 or 1-dim arrays should default to f-contiguous in julia + return true + end + if st[1] != sizeof(T) + return false + end + for j = 2:N + if st[j] != st[j-1] * sz[j-1] + return false + end + end + return true +end + +f_contiguous(T::Type, sz::NTuple{N1,Int}, st::NTuple{N2,Int}) where {N1,N2} = + error("stride and size are different lengths, size: $sz, strides: $sz") + +f_contiguous(i::PyArray_Info{T,N}) where {T,N} = f_contiguous(T, i.sz, i.st) +c_contiguous(i::PyArray_Info{T,N}) where {T,N} = f_contiguous(T, reverse(i.sz), reverse(i.st)) + +######################################################################### +# PyArray: no-copy wrapper around NumPy ndarray +# +# Hopefully, in the future this can be a subclass of StridedArray (see +# Julia issue #2345), which will allow it to be used with most Julia +# functions, but that is not possible at the moment. So, to use this +# with Julia linalg functions etcetera a copy is still required. + +""" + PyArray(o::PyObject) + +This converts an `ndarray` object `o` to a PyArray. + +This implements a nocopy wrapper to a NumPy array (currently of only numeric types only). + +If you are using `pycall` and the function returns an `ndarray`, you can use `PyArray` as the return type to directly receive a `PyArray`. +""" +mutable struct PyArray{T,N} <: AbstractArray{T,N} + o::PyObject + info::PyArray_Info + dims::Dims + st::NTuple{N,Int} + f_contig::Bool + c_contig::Bool + data::Ptr{T} + + function PyArray{T,N}(o::PyObject, info::PyArray_Info) where {T,N} + if !aligned(info) + throw(ArgumentError("only NPY_ARRAY_ALIGNED arrays are supported")) + elseif !info.native + throw(ArgumentError("only native byte-order arrays are supported")) + elseif eltype(info) != T + throw(ArgumentError("inconsistent type in PyArray constructor")) + elseif length(info.sz) != N || length(info.st) != N + throw(ArgumentError("inconsistent ndims in PyArray constructor")) + end + return new{T,N}(o, info, tuple(info.sz...), div.(info.st, sizeof(T)), + f_contiguous(info), c_contiguous(info), + convert(Ptr{T}, info.data)) + end +end + +function PyArray(o::PyObject) + info = PyArray_Info(o) + return PyArray{eltype(info), length(info.sz)}(o, info) +end + +size(a::PyArray) = a.dims +ndims(a::PyArray{T,N}) where {T,N} = N + +similar(a::PyArray, T, dims::Dims) = Array{T}(undef, dims) + +function setdata!{T,N}(a::PyArray{T,N}, o::PyObject, pybufinfo=PyBuffer()) + PyBuffer!(pybufinfo, o, PyBUF_ND_CONTIGUOUS) + dataptr = pybufinfo.buf.buf + a.data = reinterpret(Ptr{T}, dataptr) + a +end + +function copy(a::PyArray{T,N}) where {T,N} + if N > 1 && a.c_contig # equivalent to f_contig with reversed dims + B = unsafe_wrap(Array, a.data, ntuple((n -> a.dims[N - n + 1]), N)) + return permutedims(B, (N:-1:1)) + end + A = Array{T}(undef, a.dims) + if a.f_contig + ccall(:memcpy, Cvoid, (Ptr{T}, Ptr{T}, Int), A, a, sizeof(T)*length(a)) + return A + else + return copyto!(A, a) + end +end + +# TODO: need to do bounds-checking of these indices! + +getindex(a::PyArray{T,0}) where {T} = unsafe_load(a.data) +getindex(a::PyArray{T,1}, i::Integer) where {T} = unsafe_load(a.data, 1 + (i-1)*a.st[1]) + +getindex(a::PyArray{T,2}, i::Integer, j::Integer) where {T} = + unsafe_load(a.data, 1 + (i-1)*a.st[1] + (j-1)*a.st[2]) + +function getindex(a::PyArray, i::Integer) + if a.f_contig + return unsafe_load(a.data, i) + else + return a[ind2sub(a.dims, i)...] + end +end + +function getindex(a::PyArray, is::Integer...) + index = 1 + n = min(length(is),length(a.st)) + for i = 1:n + index += (is[i]-1)*a.st[i] + end + for i = n+1:length(is) + if is[i] != 1 + throw(BoundsError()) + end + end + unsafe_load(a.data, index) +end + +function writeok_assign(a::PyArray, v, i::Integer) + if a.info.readonly + throw(ArgumentError("read-only PyArray")) + else + unsafe_store!(a.data, v, i) + end + return a +end + +setindex!(a::PyArray{T,0}, v) where {T} = writeok_assign(a, v, 1) +setindex!(a::PyArray{T,1}, v, i::Integer) where {T} = writeok_assign(a, v, 1 + (i-1)*a.st[1]) + +setindex!(a::PyArray{T,2}, v, i::Integer, j::Integer) where {T} = + writeok_assign(a, v, 1 + (i-1)*a.st[1] + (j-1)*a.st[2]) + +function setindex!(a::PyArray, v, i::Integer) + if a.f_contig + return writeok_assign(a, v, i) + else + return setindex!(a, v, ind2sub(a.dims, i)...) + end +end + +function setindex!(a::PyArray, v, is::Integer...) + index = 1 + n = min(length(is),length(a.st)) + for i = 1:n + index += (is[i]-1)*a.st[i] + end + for i = n+1:length(is) + if is[i] != 1 + throw(BoundsError()) + end + end + writeok_assign(a, v, index) +end + +stride(a::PyArray, i::Integer) = a.st[i] + +Base.unsafe_convert(::Type{Ptr{T}}, a::PyArray{T}) where {T} = a.data + +pointer(a::PyArray, i::Int) = pointer(a, ind2sub(a.dims, i)) + +function pointer(a::PyArray{T}, is::Tuple{Vararg{Int}}) where T + offset = 0 + for i = 1:length(is) + offset += (is[i]-1)*a.st[i] + end + return a.data + offset*sizeof(T) +end + +summary(a::PyArray{T}) where {T} = string(Base.dims2string(size(a)), " ", + string(T), " PyArray") + +######################################################################### +# PyArray <-> PyObject conversions + +const PYARR_TYPES = Union{Bool,Int8,UInt8,Int16,UInt16,Int32,UInt32,Int64,UInt64,Float16,Float32,Float64,ComplexF32,ComplexF64,PyPtr} + +PyObject(a::PyArray) = a.o + +convert(::Type{PyArray}, o::PyObject) = PyArray(o) + +function convert(::Type{Array{T, 1}}, o::PyObject) where T<:PYARR_TYPES + try + copy(PyArray{T, 1}(o, PyArray_Info(o))) # will check T and N vs. info + catch + len = @pycheckz ccall((@pysym :PySequence_Size), Int, (PyPtr,), o) + A = Array{pyany_toany(T)}(undef, len) + py2array(T, A, o, 1, 1) + end +end + +function convert(::Type{Array{T}}, o::PyObject) where T<:PYARR_TYPES + try + info = PyArray_Info(o) + try + copy(PyArray{T, length(info.sz)}(o, info)) # will check T == eltype(info) + catch + return py2array(T, Array{pyany_toany(T)}(undef, info.sz...), o, 1, 1) + end + catch + py2array(T, o) + end +end + +function convert(::Type{Array{T,N}}, o::PyObject) where {T<:PYARR_TYPES,N} + try + info = PyArray_Info(o) + try + copy(PyArray{T,N}(o, info)) # will check T,N == eltype(info),ndims(info) + catch + nd = length(info.sz) + if nd != N + throw(ArgumentError("cannot convert $(nd)d array to $(N)d")) + end + return py2array(T, Array{pyany_toany(T)}(undef, info.sz...), o, 1, 1) + end + catch + A = py2array(T, o) + if ndims(A) != N + throw(ArgumentError("cannot convert $(ndims(A))d array to $(N)d")) + end + A + end +end + +function convert(::Type{Array{PyObject}}, o::PyObject) + map(pyincref, convert(Array{PyPtr}, o)) +end + +function convert(::Type{Array{PyObject,1}}, o::PyObject) + map(pyincref, convert(Array{PyPtr, 1}, o)) +end + +function convert(::Type{Array{PyObject,N}}, o::PyObject) where N + map(pyincref, convert(Array{PyPtr, N}, o)) +end + +array_format(o::PyObject) = array_format(PyBuffer(o, PyBUF_ND_CONTIGUOUS)) + +""" +``` +NoCopyArray(o::PyObject) +``` +Convert a Python array-like object, to a Julia `Array` without making a copy of +the data. If the data is stored in row-major format (the default in +Python/NumPy), then the returned array `nca` will be a `PermutedDimsArray` such +that the arrays are indexed the same way in Julia and Python. i.e. +`nca[idxs...] == o[idxs...]` + +Warning: This function is only lightly tested, and should be considered +experimental - it may cause segmentation faults on conversion or subsequent +array access, or be subtly broken in other ways. Only dense/contiguous, native +endian arrays that support the Python Buffer protocol are likely be converted +correctly. +""" +function NoCopyArray(o::PyObject) + # n.b. the pydecref(::PyBuffer) finalizer handles releasing the PyBuffer + pybuf = PyBuffer(o, PyBUF_ND_CONTIGUOUS) + T, native_byteorder = array_format(pybuf) + !native_byteorder && throw(ArgumentError( + "Only native endian format supported, format string: '$(get_format_str(pybuf))'")) + T == Void && throw(ArgumentError( + "Array datatype '$(get_format_str(pybuf))' not supported")) + # TODO more checks on strides etc + sz = size(pybuf) + arr = unsafe_wrap(Array, convert(Ptr{T}, pybuf.buf.buf), sz, false) + !f_contiguous(T, sz, strides(pybuf)) && + (arr = PermutedDimsArray(reshape(arr, reverse(sz)), (pybuf.buf.ndim:-1:1))) + return arr +end diff --git a/src/pybuffer.jl b/src/pybuffer.jl index f6beed92..a2d39661 100644 --- a/src/pybuffer.jl +++ b/src/pybuffer.jl @@ -244,37 +244,4 @@ function array_format(pybuf::PyBuffer) typestrs[fmt_str[type_start_idx:end]], native_byteorder end -array_format(o::PyObject) = array_format(PyBuffer(o, PyBUF_ND_CONTIGUOUS)) - -function PyArrayInfoFromBuffer(o::PyObject) - # n.b. the pydecref(::PyBuffer) finalizer handles releasing the PyBuffer - pybuf = PyBuffer(o, PyBUF_ND_CONTIGUOUS) - T, native_byteorder = array_format(pybuf) - sz = size(pybuf) - N = length(sz) - strd = strides(pybuf) - isreadonly = pybuf.buf.readonly==1 - return PyArray_Info{T,N}(native_byteorder, sz, strd, pybuf.buf.buf, isreadonly) -end - -function PyArrayFromBuffer(o::PyObject) - info = PyArrayInfoFromBuffer(o::PyObject) - PyArray{eltype(info), length(info.sz)}(o, info) -end - -function ArrayFromBuffer(o::PyObject) - # n.b. the pydecref(::PyBuffer) finalizer handles releasing the PyBuffer - pybuf = PyBuffer(o, PyBUF_ND_CONTIGUOUS) - T, native_byteorder = array_format(pybuf) - !native_byteorder && throw(ArgumentError( - "Only native endian format supported, format string: '$(get_format_str(pybuf))'")) - T == Void && throw(ArgumentError( - "Array datatype '$(get_format_str(pybuf))' not supported")) - # TODO more checks on strides etc - arr = unsafe_wrap(Array, convert(Ptr{T}, pybuf.buf.buf), size(pybuf), false) - !f_contiguous(T, size(pybuf), strides(pybuf)) && - (arr = PermutedDimsArray(arr, (pybuf.buf.ndim:-1:1))) - return arr -end - ############################################################################# diff --git a/test/testpybuffer.jl b/test/testpybuffer.jl index 0ad3696a..c2d588cc 100644 --- a/test/testpybuffer.jl +++ b/test/testpybuffer.jl @@ -6,12 +6,12 @@ pyutf8(s::String) = pyutf8(PyObject(s)) @testset "PyBuffer" begin np = pyimport("numpy") - # arrpyo = pyimport("array")["array"]=>PyObject - # listpyo = pybuiltin("list")=>PyObject - arrpy = pyimport("array")["array"] + nparr = np["array"] listpy = pybuiltin("list") - arrpyo(args...) = pycall(arrpy, PyObject, args...) + arrpyo(args...; kwargs...) = pycall(nparr, PyObject, args...; kwargs...) listpyo(args...) = pycall(listpy, PyObject, args...) + pytestarray(sz::Int...; order="C") = + pycall(arrpyo(1.0:prod(sz), "d")["reshape"], PyObject, sz, order=order) @testset "String Buffers" begin b = PyCall.PyBuffer(pyutf8("test string")) @@ -27,26 +27,64 @@ pyutf8(s::String) = pyutf8(PyObject(s)) pycall(np["ndarray"], PyObject, 2; buffer=UInt8[0,1,3,2], dtype=wrong_endian_str*"i2") # Not supported, so throws - @test_throws ArgumentError ArrayFromBuffer(wrong_endian_arr) - @test_throws ArgumentError PyArrayFromBuffer(wrong_endian_arr) + @test_throws ArgumentError NoCopyArray(wrong_endian_arr) + @test_throws ArgumentError PyArray(wrong_endian_arr) end - @testset "ArrayFromBuffer" begin - ao = arrpyo("d", 1.0:10.0) + @testset "NoCopyArray 1d" begin + ao = arrpyo(1.0:10.0, "d") pybuf = PyBuffer(ao, PyBUF_ND_CONTIGUOUS) T, native_byteorder = array_format(pybuf) @test T == Float64 @test native_byteorder == true @test size(pybuf) == (10,) - @test strides(pybuf) == (sizeof(T),) - @test !(ArrayFromBuffer(ao) isa PermutedDimsArray) - @test ArrayFromBuffer(ao) isa Array + @test strides(pybuf) == (1,) .* sizeof(T) + nca = NoCopyArray(ao) + @test !(nca isa PermutedDimsArray) + @test nca isa Array + @test nca[3] == ao[3] + @test nca[4] == ao[4] + end + + @testset "NoCopyArray 2d f-contig" begin + ao = arrpyo(reshape(1.0:12.0, (3,4)), "d", order="F") + pybuf = PyBuffer(ao, PyBUF_ND_CONTIGUOUS) + T, native_byteorder = array_format(pybuf) + @test T == Float64 + @test native_byteorder == true + @test size(pybuf) == (3,4) + @test strides(pybuf) == (1, 3) .* sizeof(T) + nca = NoCopyArray(ao) + @test !(nca isa PermutedDimsArray) + # @show typeof(nca) (nca isa Array) + @test nca isa Array + @test size(nca) == (3,4) + @test strides(nca) == (1,3) + @test nca[3,2] == ao[3,2] + @test nca[2,3] == ao[2,3] + end + + @testset "NoCopyArray 3d c-contig" begin + ao = pytestarray(3,4,5) + pybuf = PyBuffer(ao, PyBUF_ND_CONTIGUOUS) + T, native_byteorder = array_format(pybuf) + @test T == Float64 + @test native_byteorder == true + @test size(pybuf) == (3,4,5) + @test strides(pybuf) == (20,5,1) .* sizeof(T) + nca = NoCopyArray(ao) + @test nca isa PermutedDimsArray + @test !(nca isa Array) + @test size(nca) == (3,4,5) + @test strides(nca) == (20,5,1) + @test nca[2,3,4] == ao[2,3,4] + @test nca[3,2,4] == ao[3,2,4] end @testset "isbuftype" begin @test isbuftype(PyObject(0)) == false @test isbuftype(listpyo((1.0:10.0...))) == false - @test isbuftype(arrpyo("d", 1.0:10.0)) == true + @test isbuftype(arrpyo(1.0:10.0, "d")) == true @test isbuftype(PyObject([1:10...])) == true end end From 984a59f035c341ad3b9980cfa127673d88882e9b Mon Sep 17 00:00:00 2001 From: Joel Mason Date: Tue, 7 Aug 2018 02:02:17 +0300 Subject: [PATCH 07/16] Void to Nothing/Cvoid --- src/pyarray.jl | 2 +- src/pybuffer.jl | 10 ++++++---- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/src/pyarray.jl b/src/pyarray.jl index f2f55041..6dc7c614 100644 --- a/src/pyarray.jl +++ b/src/pyarray.jl @@ -312,7 +312,7 @@ function NoCopyArray(o::PyObject) T, native_byteorder = array_format(pybuf) !native_byteorder && throw(ArgumentError( "Only native endian format supported, format string: '$(get_format_str(pybuf))'")) - T == Void && throw(ArgumentError( + T == Nothing && throw(ArgumentError( "Array datatype '$(get_format_str(pybuf))' not supported")) # TODO more checks on strides etc sz = size(pybuf) diff --git a/src/pybuffer.jl b/src/pybuffer.jl index a2d39661..77dee118 100644 --- a/src/pybuffer.jl +++ b/src/pybuffer.jl @@ -184,26 +184,28 @@ end # ref: https://github.com/numpy/numpy/blob/v1.14.2/numpy/core/src/multiarray/buffer.c#L966 const standard_typestrs = Dict{String,DataType}( - "?"=>Bool, "P"=>Ptr{Void}, + "?"=>Bool, "P"=>Ptr{Cvoid}, "b"=>Int8, "B"=>UInt8, "h"=>Int16, "H"=>UInt16, "i"=>Int32, "I"=>UInt32, "l"=>Int32, "L"=>UInt32, "q"=>Int64, "Q"=>UInt64, "e"=>Float16, "f"=>Float32, - "d"=>Float64, "g"=>Void, # Float128? + "d"=>Float64, "g"=>Nothing, # Float128? + # `Nothing` indicates no equiv Julia type "Z8"=>ComplexF32, "Z16"=>ComplexF64, "Zf"=>ComplexF32, "Zd"=>ComplexF64) const native_typestrs = Dict{String,DataType}( - "?"=>Bool, "P"=>Ptr{Void}, + "?"=>Bool, "P"=>Ptr{Cvoid}, "b"=>Int8, "B"=>UInt8, "h"=>Cshort, "H"=>Cushort, "i"=>Cint, "I"=>Cuint, "l"=>Clong, "L"=>Culong, "q"=>Clonglong, "Q"=>Culonglong, "e"=>Float16, "f"=>Cfloat, - "d"=>Cdouble, "g"=>Void, # Float128? + "d"=>Cdouble, "g"=>Nothing, # Float128? + # `Nothing` indicates no equiv Julia type "Z8"=>ComplexF32, "Z16"=>ComplexF64, "Zf"=>ComplexF32, "Zd"=>ComplexF64) From 80fc1d9d0a6c1eba49251da92f5a678168161d3a Mon Sep 17 00:00:00 2001 From: Joel Mason Date: Tue, 7 Aug 2018 02:05:48 +0300 Subject: [PATCH 08/16] 0.7 deprecations --- src/pyarray.jl | 8 ++++++-- test/testpybuffer.jl | 2 +- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/src/pyarray.jl b/src/pyarray.jl index 6dc7c614..0202418d 100644 --- a/src/pyarray.jl +++ b/src/pyarray.jl @@ -118,7 +118,7 @@ ndims(a::PyArray{T,N}) where {T,N} = N similar(a::PyArray, T, dims::Dims) = Array{T}(undef, dims) -function setdata!{T,N}(a::PyArray{T,N}, o::PyObject, pybufinfo=PyBuffer()) +function setdata!(a::PyArray{T,N}, o::PyObject, pybufinfo=PyBuffer()) where {T,N} PyBuffer!(pybufinfo, o, PyBUF_ND_CONTIGUOUS) dataptr = pybufinfo.buf.buf a.data = reinterpret(Ptr{T}, dataptr) @@ -316,7 +316,11 @@ function NoCopyArray(o::PyObject) "Array datatype '$(get_format_str(pybuf))' not supported")) # TODO more checks on strides etc sz = size(pybuf) - arr = unsafe_wrap(Array, convert(Ptr{T}, pybuf.buf.buf), sz, false) + @static if VERSION >= v"0.7.0-DEV.3526" # julia#25647 + arr = unsafe_wrap(Array, convert(Ptr{T}, pybuf.buf.buf), sz, own=false) + else + arr = unsafe_wrap(Array, convert(Ptr{T}, pybuf.buf.buf), sz, false) + end !f_contiguous(T, sz, strides(pybuf)) && (arr = PermutedDimsArray(reshape(arr, reverse(sz)), (pybuf.buf.ndim:-1:1))) return arr diff --git a/test/testpybuffer.jl b/test/testpybuffer.jl index c2d588cc..6e7cddcc 100644 --- a/test/testpybuffer.jl +++ b/test/testpybuffer.jl @@ -83,7 +83,7 @@ pyutf8(s::String) = pyutf8(PyObject(s)) @testset "isbuftype" begin @test isbuftype(PyObject(0)) == false - @test isbuftype(listpyo((1.0:10.0...))) == false + @test isbuftype(listpyo((1.0:10.0...,))) == false @test isbuftype(arrpyo(1.0:10.0, "d")) == true @test isbuftype(PyObject([1:10...])) == true end From edcc3ca3d19451d4b64602c03406efe2194f6bbf Mon Sep 17 00:00:00 2001 From: Joel Mason Date: Tue, 7 Aug 2018 02:16:32 +0300 Subject: [PATCH 09/16] put my thang down flip it and reverse it --- src/pyarray.jl | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/src/pyarray.jl b/src/pyarray.jl index 0202418d..e60867dd 100644 --- a/src/pyarray.jl +++ b/src/pyarray.jl @@ -59,11 +59,7 @@ f_contiguous(T::Type, sz::NTuple{N1,Int}, st::NTuple{N2,Int}) where {N1,N2} = f_contiguous(i::PyArray_Info{T,N}) where {T,N} = f_contiguous(T, i.sz, i.st) c_contiguous(i::PyArray_Info{T,N}) where {T,N} = - @static if VERSION >= v"0.7.0-DEV.4534" # julia#26369 - f_contiguous(T, reverse(i.sz), reverse(i.st)) - else - f_contiguous(T, flipdim(i.sz,1), flipdim(i.st,1)) - end + f_contiguous(T, reverse(i.sz), reverse(i.st)) ######################################################################### From e939d629db86c454ad45340fcd32db85ff3649b0 Mon Sep 17 00:00:00 2001 From: Joel Mason Date: Thu, 13 Sep 2018 15:45:36 +1000 Subject: [PATCH 10/16] Improve lifecycle management of Pybuffers PyArray no holds a ref to the PyBuffer object. This should ensure that the buffer isn't released until the PyArray is no longer needed setdata! now explicitly releases the buffer it used to point to --- src/pyarray.jl | 10 ++++++++-- src/pybuffer.jl | 8 ++++++++ 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/src/pyarray.jl b/src/pyarray.jl index e60867dd..c2689793 100644 --- a/src/pyarray.jl +++ b/src/pyarray.jl @@ -8,6 +8,7 @@ struct PyArray_Info{T,N} st::NTuple{N,Int} # strides, in multiples of bytes! data::Ptr{T} readonly::Bool + pybuf::PyBuffer end function PyArray_Info(o::PyObject) @@ -19,7 +20,7 @@ function PyArray_Info(o::PyObject) length(strd) == 0 && (sz = ()) N = length(sz) isreadonly = pybuf.buf.readonly==1 - return PyArray_Info{T,N}(native_byteorder, sz, strd, pybuf.buf.buf, isreadonly) + return PyArray_Info{T,N}(native_byteorder, sz, strd, pybuf.buf.buf, isreadonly, pybuf) end aligned(i::PyArray_Info{T,N}) where {T,N} = # FIXME: also check pointer alignment? @@ -114,7 +115,12 @@ ndims(a::PyArray{T,N}) where {T,N} = N similar(a::PyArray, T, dims::Dims) = Array{T}(undef, dims) -function setdata!(a::PyArray{T,N}, o::PyObject, pybufinfo=PyBuffer()) where {T,N} +""" +Update the data ptr of the `a` to point to the buffer exposed by `o` through +the Python buffer interface +""" +function setdata!(a::PyArray{T,N}, o::PyObject) where {T,N} + pybufinfo = a.info.pybuf PyBuffer!(pybufinfo, o, PyBUF_ND_CONTIGUOUS) dataptr = pybufinfo.buf.buf a.data = reinterpret(Ptr{T}, dataptr) diff --git a/src/pybuffer.jl b/src/pybuffer.jl index 77dee118..1a7536cc 100644 --- a/src/pybuffer.jl +++ b/src/pybuffer.jl @@ -37,6 +37,13 @@ mutable struct PyBuffer end end +""" +`pydecref(o::PyBuffer)` +Release the reference to buffer `o` +N.b. As per https://docs.python.org/3/c-api/buffer.html#c.PyBuffer_Release, +It is an error to call this function on a PyBuffer that was not obtained via +the python c-api function `PyObject_GetBuffer()` +""" function pydecref(o::PyBuffer) # note that PyBuffer_Release sets o.obj to NULL, and # is a no-op if o.obj is already NULL @@ -114,6 +121,7 @@ end function PyBuffer!(b::PyBuffer, o::Union{PyObject,PyPtr}, flags=PyBUF_SIMPLE) # TODO change to `Ref{PyBuffer}` when 0.6 is dropped. + pydecref(b) # ensure b is properly released @pycheckz ccall((@pysym :PyObject_GetBuffer), Cint, (PyPtr, Any, Cint), o, b, flags) return b From 3a24a0c5b902dbc4ba728bd27935b1ea54d78fe1 Mon Sep 17 00:00:00 2001 From: Joel Mason Date: Thu, 13 Sep 2018 17:21:17 +1000 Subject: [PATCH 11/16] Avoid tests when NumPy not available, and fix a test --- test/testpybuffer.jl | 144 ++++++++++++++++++++++--------------------- 1 file changed, 74 insertions(+), 70 deletions(-) diff --git a/test/testpybuffer.jl b/test/testpybuffer.jl index 6e7cddcc..91937a72 100644 --- a/test/testpybuffer.jl +++ b/test/testpybuffer.jl @@ -1,18 +1,10 @@ using Compat.Test, PyCall, Compat -using PyCall: f_contiguous, PyBUF_ND_CONTIGUOUS, array_format +using PyCall: f_contiguous, PyBUF_ND_CONTIGUOUS, array_format, npy_initialized pyutf8(s::PyObject) = pycall(s["encode"], PyObject, "utf-8") pyutf8(s::String) = pyutf8(PyObject(s)) @testset "PyBuffer" begin - np = pyimport("numpy") - nparr = np["array"] - listpy = pybuiltin("list") - arrpyo(args...; kwargs...) = pycall(nparr, PyObject, args...; kwargs...) - listpyo(args...) = pycall(listpy, PyObject, args...) - pytestarray(sz::Int...; order="C") = - pycall(arrpyo(1.0:prod(sz), "d")["reshape"], PyObject, sz, order=order) - @testset "String Buffers" begin b = PyCall.PyBuffer(pyutf8("test string")) @test ndims(b) == 1 @@ -21,70 +13,82 @@ pyutf8(s::String) = pyutf8(PyObject(s)) @test PyCall.iscontiguous(b) == true end - @testset "Non-native-endian" begin - wrong_endian_str = ENDIAN_BOM == 0x01020304 ? "<" : ">" - wrong_endian_arr = - pycall(np["ndarray"], PyObject, 2; buffer=UInt8[0,1,3,2], - dtype=wrong_endian_str*"i2") - # Not supported, so throws - @test_throws ArgumentError NoCopyArray(wrong_endian_arr) - @test_throws ArgumentError PyArray(wrong_endian_arr) - end + if !npy_initialized + println("Skipping array related buffer tests since NumPy not available") + else + np = pyimport("numpy") + listpy = pybuiltin("list") + arrpyo(args...; kwargs...) = + pycall(np["array"], PyObject, args...; kwargs...) + listpyo(args...) = pycall(listpy, PyObject, args...) + pytestarray(sz::Int...; order="C") = + pycall(arrpyo(1.0:prod(sz), "d")["reshape"], PyObject, sz, order=order) - @testset "NoCopyArray 1d" begin - ao = arrpyo(1.0:10.0, "d") - pybuf = PyBuffer(ao, PyBUF_ND_CONTIGUOUS) - T, native_byteorder = array_format(pybuf) - @test T == Float64 - @test native_byteorder == true - @test size(pybuf) == (10,) - @test strides(pybuf) == (1,) .* sizeof(T) - nca = NoCopyArray(ao) - @test !(nca isa PermutedDimsArray) - @test nca isa Array - @test nca[3] == ao[3] - @test nca[4] == ao[4] - end + @testset "Non-native-endian" begin + wrong_endian_str = ENDIAN_BOM == 0x01020304 ? "<" : ">" + wrong_endian_arr = + pycall(np["ndarray"], PyObject, 2; buffer=UInt8[0,1,3,2], + dtype=wrong_endian_str*"i2") + # Not supported, so throws + @test_throws ArgumentError NoCopyArray(wrong_endian_arr) + @test_throws ArgumentError PyArray(wrong_endian_arr) + end - @testset "NoCopyArray 2d f-contig" begin - ao = arrpyo(reshape(1.0:12.0, (3,4)), "d", order="F") - pybuf = PyBuffer(ao, PyBUF_ND_CONTIGUOUS) - T, native_byteorder = array_format(pybuf) - @test T == Float64 - @test native_byteorder == true - @test size(pybuf) == (3,4) - @test strides(pybuf) == (1, 3) .* sizeof(T) - nca = NoCopyArray(ao) - @test !(nca isa PermutedDimsArray) - # @show typeof(nca) (nca isa Array) - @test nca isa Array - @test size(nca) == (3,4) - @test strides(nca) == (1,3) - @test nca[3,2] == ao[3,2] - @test nca[2,3] == ao[2,3] - end + @testset "NoCopyArray 1d" begin + ao = arrpyo(1.0:10.0, "d") + pybuf = PyBuffer(ao, PyBUF_ND_CONTIGUOUS) + T, native_byteorder = array_format(pybuf) + @test T == Float64 + @test native_byteorder == true + @test size(pybuf) == (10,) + @test strides(pybuf) == (1,) .* sizeof(T) + nca = NoCopyArray(ao) + @test !(nca isa PermutedDimsArray) + @test nca isa Array + @test nca[3] == ao[3] + @test nca[4] == ao[4] + end - @testset "NoCopyArray 3d c-contig" begin - ao = pytestarray(3,4,5) - pybuf = PyBuffer(ao, PyBUF_ND_CONTIGUOUS) - T, native_byteorder = array_format(pybuf) - @test T == Float64 - @test native_byteorder == true - @test size(pybuf) == (3,4,5) - @test strides(pybuf) == (20,5,1) .* sizeof(T) - nca = NoCopyArray(ao) - @test nca isa PermutedDimsArray - @test !(nca isa Array) - @test size(nca) == (3,4,5) - @test strides(nca) == (20,5,1) - @test nca[2,3,4] == ao[2,3,4] - @test nca[3,2,4] == ao[3,2,4] - end + @testset "NoCopyArray 2d f-contig" begin + ao = arrpyo(reshape(1.0:12.0, (3,4)) |> collect, "d", order="F") + pybuf = PyBuffer(ao, PyBUF_ND_CONTIGUOUS) + T, native_byteorder = array_format(pybuf) + @test T == Float64 + @test native_byteorder == true + @test size(pybuf) == (3,4) + @test strides(pybuf) == (1, 3) .* sizeof(T) + nca = NoCopyArray(ao) + @test !(nca isa PermutedDimsArray) + # @show typeof(nca) (nca isa Array) + @test nca isa Array + @test size(nca) == (3,4) + @test strides(nca) == (1,3) + @test nca[3,2] == ao[3,2] + @test nca[2,3] == ao[2,3] + end + + @testset "NoCopyArray 3d c-contig" begin + ao = pytestarray(3,4,5) + pybuf = PyBuffer(ao, PyBUF_ND_CONTIGUOUS) + T, native_byteorder = array_format(pybuf) + @test T == Float64 + @test native_byteorder == true + @test size(pybuf) == (3,4,5) + @test strides(pybuf) == (20,5,1) .* sizeof(T) + nca = NoCopyArray(ao) + @test nca isa PermutedDimsArray + @test !(nca isa Array) + @test size(nca) == (3,4,5) + @test strides(nca) == (20,5,1) + @test nca[2,3,4] == ao[2,3,4] + @test nca[3,2,4] == ao[3,2,4] + end - @testset "isbuftype" begin - @test isbuftype(PyObject(0)) == false - @test isbuftype(listpyo((1.0:10.0...,))) == false - @test isbuftype(arrpyo(1.0:10.0, "d")) == true - @test isbuftype(PyObject([1:10...])) == true + @testset "isbuftype" begin + @test isbuftype(PyObject(0)) == false + @test isbuftype(listpyo((1.0:10.0...,))) == false + @test isbuftype(arrpyo(1.0:10.0, "d")) == true + @test isbuftype(PyObject([1:10...])) == true + end end end From dbc9caab22ee4619d00b3f290b228066d702d0e0 Mon Sep 17 00:00:00 2001 From: Joel Mason Date: Mon, 22 Oct 2018 20:04:58 +1100 Subject: [PATCH 12/16] Clarify pydecref(o::PyBuffer) docstring and change pass a PyPtr_NULL instead of a C_NULL for Py_buffer.obj when creating a new PyBuffer --- src/pybuffer.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/pybuffer.jl b/src/pybuffer.jl index 1a7536cc..ea076e1f 100644 --- a/src/pybuffer.jl +++ b/src/pybuffer.jl @@ -29,7 +29,7 @@ end mutable struct PyBuffer buf::Py_buffer PyBuffer() = begin - b = new(Py_buffer(C_NULL, C_NULL, 0, 0, + b = new(Py_buffer(C_NULL, PyPtr_NULL, 0, 0, 0, 0, C_NULL, C_NULL, C_NULL, C_NULL, C_NULL, C_NULL, C_NULL)) @compat finalizer(pydecref, b) @@ -42,7 +42,7 @@ end Release the reference to buffer `o` N.b. As per https://docs.python.org/3/c-api/buffer.html#c.PyBuffer_Release, It is an error to call this function on a PyBuffer that was not obtained via -the python c-api function `PyObject_GetBuffer()` +the python c-api function `PyObject_GetBuffer(), unless o.obj is a PyPtr(C_NULL)` """ function pydecref(o::PyBuffer) # note that PyBuffer_Release sets o.obj to NULL, and From 181b785a2cb5f75ca4daee5bc8f27db10f552d81 Mon Sep 17 00:00:00 2001 From: Joel Mason Date: Wed, 24 Oct 2018 19:29:53 +1100 Subject: [PATCH 13/16] Remove exports: setdata!, NoCopyArray, isbuftype --- src/PyCall.jl | 2 +- test/testpybuffer.jl | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/PyCall.jl b/src/PyCall.jl index cbc5b0a4..f96740ab 100644 --- a/src/PyCall.jl +++ b/src/PyCall.jl @@ -6,7 +6,7 @@ using Compat, VersionParsing export pycall, pycall!, pyimport, pyimport_e, pybuiltin, PyObject, PyReverseDims, PyPtr, pyincref, pydecref, pyversion, - PyArray, PyArray_Info, PyBuffer, setdata!, NoCopyArray, isbuftype, + PyArray, PyArray_Info, PyBuffer, pyerr_check, pyerr_clear, pytype_query, PyAny, @pyimport, PyDict, pyisinstance, pywrap, pytypeof, pyeval, PyVector, pystring, pystr, pyrepr, pyraise, pytype_mapping, pygui, pygui_start, pygui_stop, diff --git a/test/testpybuffer.jl b/test/testpybuffer.jl index 91937a72..f7cbc359 100644 --- a/test/testpybuffer.jl +++ b/test/testpybuffer.jl @@ -1,5 +1,6 @@ using Compat.Test, PyCall, Compat -using PyCall: f_contiguous, PyBUF_ND_CONTIGUOUS, array_format, npy_initialized +using PyCall: f_contiguous, PyBUF_ND_CONTIGUOUS, array_format, npy_initialized, +NoCopyArray, isbuftype, setdata! pyutf8(s::PyObject) = pycall(s["encode"], PyObject, "utf-8") pyutf8(s::String) = pyutf8(PyObject(s)) From 7304622554e215ca98148c3e96c1830c2042bf56 Mon Sep 17 00:00:00 2001 From: Joel Mason Date: Wed, 24 Oct 2018 19:42:10 +1100 Subject: [PATCH 14/16] Test setdata!, improve NoCopyArray docstring --- src/pyarray.jl | 15 ++++++++++----- test/testpybuffer.jl | 9 +++++++++ 2 files changed, 19 insertions(+), 5 deletions(-) diff --git a/src/pyarray.jl b/src/pyarray.jl index c2689793..cafed69f 100644 --- a/src/pyarray.jl +++ b/src/pyarray.jl @@ -296,11 +296,16 @@ array_format(o::PyObject) = array_format(PyBuffer(o, PyBUF_ND_CONTIGUOUS)) ``` NoCopyArray(o::PyObject) ``` -Convert a Python array-like object, to a Julia `Array` without making a copy of -the data. If the data is stored in row-major format (the default in -Python/NumPy), then the returned array `nca` will be a `PermutedDimsArray` such -that the arrays are indexed the same way in Julia and Python. i.e. -`nca[idxs...] == o[idxs...]` +Convert a Python array-like object, to a Julia `Array` or `PermutedDimsArray` +without making a copy of the data. + +If the data is stored in row-major format +(the default in Python/NumPy), then the returned array `nca` will be a +`PermutedDimsArray` such that the arrays are indexed the same way in Julia and +Python. i.e. `nca[idxs...] == o[idxs...]` + +If the data is stored in column-major format then a regular Julia `Array` will +be returned. Warning: This function is only lightly tested, and should be considered experimental - it may cause segmentation faults on conversion or subsequent diff --git a/test/testpybuffer.jl b/test/testpybuffer.jl index f7cbc359..cf552767 100644 --- a/test/testpybuffer.jl +++ b/test/testpybuffer.jl @@ -91,5 +91,14 @@ pyutf8(s::String) = pyutf8(PyObject(s)) @test isbuftype(arrpyo(1.0:10.0, "d")) == true @test isbuftype(PyObject([1:10...])) == true end + + # TODO maybe move these to a test_pyarray.jl + @testset "setdata!" begin + ao1 = arrpyo(1.0:10.0, "d") + pyarr = convert(PyArray, ao1) + ao2 = arrpyo(11.0:20.0, "d") + setdata!(pyarr, ao2) + @test all(pyarr[1:10] .== 11.0:20.0) + end end end From 0ea15dfa443ad283959ea7ae9594e7e479708aa5 Mon Sep 17 00:00:00 2001 From: Joel Mason Date: Wed, 24 Oct 2018 19:42:52 +1100 Subject: [PATCH 15/16] Add tests for PyArray getindex and fix similar for PyArray indexing into a PyArray was throwing a ambiguous method error for similar(::PyArray, T, dims::Dims) --- src/pyarray.jl | 2 +- test/testpybuffer.jl | 10 ++++++++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/src/pyarray.jl b/src/pyarray.jl index cafed69f..e31f274e 100644 --- a/src/pyarray.jl +++ b/src/pyarray.jl @@ -113,7 +113,7 @@ end size(a::PyArray) = a.dims ndims(a::PyArray{T,N}) where {T,N} = N -similar(a::PyArray, T, dims::Dims) = Array{T}(undef, dims) +similar(a::PyArray, ::Type{T}, dims::Dims) where {T} = Array{T}(undef, dims) """ Update the data ptr of the `a` to point to the buffer exposed by `o` through diff --git a/test/testpybuffer.jl b/test/testpybuffer.jl index cf552767..8d735a05 100644 --- a/test/testpybuffer.jl +++ b/test/testpybuffer.jl @@ -100,5 +100,15 @@ pyutf8(s::String) = pyutf8(PyObject(s)) setdata!(pyarr, ao2) @test all(pyarr[1:10] .== 11.0:20.0) end + + @testset "similar on PyArray PyVec getindex" begin + jlarr1 = [1:10;] + jlarr2 = hcat([1:10;], [1:10;]) + pyarr1 = pycall(np["array"], PyArray, jlarr1) + pyarr2 = pycall(np["array"], PyArray, jlarr2) + @test all(pyarr1[1:10] .== jlarr1[1:10]) + @test all(pyarr2[1:10, 2] .== jlarr2[1:10, 2]) + @test all(pyarr2[1:10, 1:2] .== jlarr2) + end end end From 99c519cf4102acd64a3a6fdd362a0a92cfb004ea Mon Sep 17 00:00:00 2001 From: Joel Mason Date: Sun, 28 Oct 2018 16:00:20 +1100 Subject: [PATCH 16/16] Add GC rooting note --- src/pyarray.jl | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/pyarray.jl b/src/pyarray.jl index e31f274e..fb8559a6 100644 --- a/src/pyarray.jl +++ b/src/pyarray.jl @@ -142,6 +142,10 @@ function copy(a::PyArray{T,N}) where {T,N} end # TODO: need to do bounds-checking of these indices! +# TODO: need to GC root these `a`s to guard against the PyArray getting gc'd, +# e.g. if it's a temporary in a function: +# `two_rands() = pycall(np.rand, PyArray, 10)[1:2]` + getindex(a::PyArray{T,0}) where {T} = unsafe_load(a.data) getindex(a::PyArray{T,1}, i::Integer) where {T} = unsafe_load(a.data, 1 + (i-1)*a.st[1])