array: add unchecked access via proxy object

This adds bounds-unchecked access to arrays through a `a.unchecked<Type,
Dimensions>()` method.  (For `array_t<T>`, the `Type` template parameter
is omitted).  The mutable version (which requires the array have the
`writeable` flag) is available as `a.mutable_unchecked<...>()`.

Specifying the Dimensions as a template parameter allows storage of an
std::array; having the strides and sizes stored that way (as opposed to
storing a copy of the array's strides/shape pointers) allows the
compiler to make significant optimizations of the shape() method that it
can't make with a pointer; testing with nested loops of the form:

    for (size_t i0 = 0; i0 < r.shape(0); i0++)
        for (size_t i1 = 0; i1 < r.shape(1); i1++)
            ...
                r(i0, i1, ...) += 1;

over a 10 million element array gives around a 25% speedup (versus using
a pointer) for the 1D case, 33% for 2D, and runs more than twice as fast
with a 5D array.
This commit is contained in:
Jason Rhinelander
2017-03-19 01:14:23 -03:00
parent 0d765f4a7c
commit 423a49b8be
4 changed files with 217 additions and 7 deletions

View File

@@ -184,4 +184,36 @@ test_initializer numpy_array([](py::module &m) {
sm.def("issue685", [](std::string) { return "string"; });
sm.def("issue685", [](py::array) { return "array"; });
sm.def("issue685", [](py::object) { return "other"; });
sm.def("proxy_add2", [](py::array_t<double> a, double v) {
auto r = a.mutable_unchecked<2>();
for (size_t i = 0; i < r.shape(0); i++)
for (size_t j = 0; j < r.shape(1); j++)
r(i, j) += v;
}, py::arg().noconvert(), py::arg());
sm.def("proxy_init3", [](double start) {
py::array_t<double, py::array::c_style> a({ 3, 3, 3 });
auto r = a.mutable_unchecked<3>();
for (size_t i = 0; i < r.shape(0); i++)
for (size_t j = 0; j < r.shape(1); j++)
for (size_t k = 0; k < r.shape(2); k++)
r(i, j, k) = start++;
return a;
});
sm.def("proxy_init3F", [](double start) {
py::array_t<double, py::array::f_style> a({ 3, 3, 3 });
auto r = a.mutable_unchecked<3>();
for (size_t k = 0; k < r.shape(2); k++)
for (size_t j = 0; j < r.shape(1); j++)
for (size_t i = 0; i < r.shape(0); i++)
r(i, j, k) = start++;
return a;
});
sm.def("proxy_squared_L2_norm", [](py::array_t<double> a) {
auto r = a.unchecked<1>();
double sumsq = 0;
for (size_t i = 0; i < r.shape(0); i++)
sumsq += r[i] * r(i); // Either notation works for a 1D array
return sumsq;
});
});