#include <Dataset.h>
Public Types | |
enum | Endianness { NATIVE = 0, LITTLE, BIG } |
Public Member Functions | |
Dataset (Group &parent, const std::string &name) | |
virtual | ~Dataset () |
Dataset< T > & | create (const std::vector< ssize_t > &dims, const std::vector< ssize_t > &maxdims=std::vector< ssize_t >(0), const std::string &filename="", enum Endianness endianness=NATIVE) |
Dataset< T > & | create1D (ssize_t len, ssize_t maxlen, const std::string &filename="", enum Endianness endianness=NATIVE) |
size_t | ndims () |
std::vector< ssize_t > | dims () |
ssize_t | dims1D () |
std::vector< ssize_t > | maxdims () |
ssize_t | maxdims1D () |
void | resize (const std::vector< ssize_t > &newdims) |
void | resize1D (ssize_t newlen) |
std::vector< std::string > | externalFiles () |
void | getMatrix (const std::vector< size_t > &pos, T *buffer, const std::vector< size_t > &size) |
void | setMatrix (const std::vector< size_t > &pos, const T *buffer, const std::vector< size_t > &size) |
void | get1D (size_t pos, T *outbuffer, size_t len, unsigned dimIndex=0) |
void | set1D (size_t pos, const T *inbuffer, size_t len, unsigned dimIndex=0) |
void | get2D (const std::vector< size_t > &pos, T *outbuffer2, size_t dim1, size_t dim2, unsigned dim1index=0, unsigned dim2index=1) |
void | set2D (const std::vector< size_t > &pos, const T *inbuffer2, size_t dim1, size_t dim2, unsigned dim1index=0, unsigned dim2index=1) |
void | get3D (const std::vector< size_t > &pos, T *outbuffer3, size_t dim1, size_t dim2, size_t dim3, unsigned dim1index=0, unsigned dim2index=1, unsigned dim3index=2) |
void | set3D (const std::vector< size_t > &pos, const T *inbuffer3, size_t dim1, size_t dim2, size_t dim3, unsigned dim1index=0, unsigned dim2index=1, unsigned dim3index=2) |
T | getScalar (const std::vector< size_t > &pos) |
T | getScalar1D (size_t pos) |
void | setScalar (const std::vector< size_t > &pos, const T &value) |
void | setScalar1D (size_t pos, T value) |
Protected Member Functions | |
bool | bigEndian (enum Endianness endianness) const |
void | matrixIO (const std::vector< size_t > &pos, T *buffer, const std::vector< size_t > &size, const std::vector< size_t > &strides, bool read) |
If the strides vector is empty, a continuous array is assumed. | |
virtual Dataset< T > & | create () |
Provides generic functionality for HDF5 Datasets.
Python example:
# Create a new HDF5 file called "example.h5" >>> f = File("example.h5", File.CREATE) # Create a 2 x 3 dataset of floats within the given file >>> d = DatasetFloat(f, "EXAMPLE_DATASET") >>> d.create([2,3]) # Retrieve some information about the dataset >>> d.dims() (2, 3) >>> d.ndims() 2 # Query the numpy format in which the data is stored >>> d.dtype <type 'numpy.float32'> # Allocate a 2 x 3 numpy array of the right type to read data into >>> import numpy >>> x = numpy.zeros((2,3), dtype=d.dtype) # Read data starting from [0,0], and display it >>> d.get2D([0,0], x) >>> x array([[ 0., 0., 0.], [ 0., 0., 0.]], dtype=float32) # Now, write some data: we write a block of size (1,2) to position (1,1) >>> y = numpy.ones((1,2), dtype=d.dtype) >>> d.set2D([1,1], y) # Read data starting from [0,0], and display it >>> d.get2D([0,0], x) >>> x array([[ 0., 0., 0.], [ 0., 1., 1.]], dtype=float32) # Clean up >>> import os >>> os.remove("example.h5")
virtual dal::Dataset< T >::~Dataset | ( | ) | [inline, virtual] |
Destruct a Dataset object.
bool dal::Dataset< T >::bigEndian | ( | enum Endianness | endianness | ) | const [protected] |
Returns true if endianness is BIG or if it is NATIVE and the current machine architecture is big endian. Returns false if endianness is LITTLE or if it is NATIVE and the current machine architecture is little endian.
Dataset<T>& dal::Dataset< T >::create | ( | const std::vector< ssize_t > & | dims, |
const std::vector< ssize_t > & | maxdims = std::vector< ssize_t >(0) , |
||
const std::string & | filename = "" , |
||
enum Endianness | endianness = NATIVE |
||
) |
Creates a new dataset with dimensions sized `dims'. If `maxdims' is set, the dataset can be scaled up to `maxdims'. The rank of the dataset is dims.size() == maxdims.size(). A maximum of -1 represents an unbounded dimension.
If a `filename' is given, that file will be used to store the data. The file can be provided by the user, or will be created upon the first write. Note that the filename cannot be changed after the dataset has been created (HDF5 1.8), so providing an absolute path will make the dataset difficult to copy or move across systems. We strongly advice against absolute paths (and "../") here! Note that HDF5 1.8 has a problem accessing external files outside the current working directory. DAL works around this, but see Known Issue 1 on how the current working directory affects this.
If `filename' equals "", then dims == maxdims is required due to limitations of HDF5.
`endianness` toggles the byte order of each stored data value. Typically:
virtual Dataset<T>& dal::Dataset< T >::create | ( | ) | [inline, protected, virtual] |
Do not use this create function. It always throws to catch incorrect calls to create() in Group. To create a Dataset, use the Dataset::create function with arguments.
Reimplemented from dal::Group.
Dataset<T>& dal::Dataset< T >::create1D | ( | ssize_t | len, |
ssize_t | maxlen, | ||
const std::string & | filename = "" , |
||
enum Endianness | endianness = NATIVE |
||
) |
Create a new 1D dataset. See Dataset::create(...).
std::vector<ssize_t> dal::Dataset< T >::dims | ( | ) |
Returns the dimension sizes.
ssize_t dal::Dataset< T >::dims1D | ( | ) |
Returns the length of the 1D dataset.
std::vector<std::string> dal::Dataset< T >::externalFiles | ( | ) |
Returns a list of the external files containing data for this dataset.
void dal::Dataset< T >::get1D | ( | size_t | pos, |
T * | outbuffer, | ||
size_t | len, | ||
unsigned | dimIndex = 0 |
||
) |
Retrieves `len` data values from a dataset starting at index `pos`. `outbuffer` must point to a memory block large enough to hold `len` data values. If the underlying dataset is multi-dimensional, use `dimIndex` to indicate the dimension to retrieve from.
[in] | pos | index of the first data value |
[out] | outbuffer | 1D destination array |
[in] | len | number of data values to retrieve |
[in] | dimIndex | index of the dimension to query |
Requires:
void dal::Dataset< T >::get2D | ( | const std::vector< size_t > & | pos, |
T * | outbuffer2, | ||
size_t | dim1, | ||
size_t | dim2, | ||
unsigned | dim1index = 0 , |
||
unsigned | dim2index = 1 |
||
) |
Retrieves a 2D matrix of data from a 2D dataset from position `pos`. `buffer` must point to a memory block large enough to hold the result.
[in] | pos | position of the first sample |
[out] | outbuffer2 | 2D destination array |
[in] | dim1 | size of first dimension of outbuffer2; determines the number of data values to retrieve |
[in] | dim2 | size of second dimension of outbuffer2; determines the number of data values to retrieve |
[in] | dim1index | index of the first dimension to query |
[in] | dim2index | index of the second dimension to query |
Requires:
void dal::Dataset< T >::get3D | ( | const std::vector< size_t > & | pos, |
T * | outbuffer3, | ||
size_t | dim1, | ||
size_t | dim2, | ||
size_t | dim3, | ||
unsigned | dim1index = 0 , |
||
unsigned | dim2index = 1 , |
||
unsigned | dim3index = 2 |
||
) |
Retrieves a 3D matrix of data from a 3D dataset from position `pos`. `buffer` must point to a memory block large enough to hold the result.
[in] | pos | position of the first sample |
[out] | outbuffer3 | 3D destination array |
[in] | dim1 | size of first dimension of outbuffer3; determines the number of data values to retrieve |
[in] | dim2 | size of second dimension of outbuffer3; determines the number of data values to retrieve |
[in] | dim3 | size of third dimension of outbuffer3; determines the number of data values to retrieve |
[in] | dim1index | index of the first dimension to query |
[in] | dim2index | index of the second dimension to query |
[in] | dim3index | index of the third dimension to query |
Requires:
void dal::Dataset< T >::getMatrix | ( | const std::vector< size_t > & | pos, |
T * | buffer, | ||
const std::vector< size_t > & | size | ||
) |
Retrieves any matrix of data of sizes `size` from position `pos`. `buffer` must point to a memory block large enough to hold the result.
Requires: pos.size() == size.size() == ndims()
T dal::Dataset< T >::getScalar | ( | const std::vector< size_t > & | pos | ) |
Retrieves a single value from the dataset at position `pos`.
Requires: pos.size() == ndims()
T dal::Dataset< T >::getScalar1D | ( | size_t | pos | ) |
See Dataset::getScalar().
std::vector<ssize_t> dal::Dataset< T >::maxdims | ( | ) |
Returns the maximum dimension sizes to which this dataset can grow; elements of -1 represent unbounded dimensions.
ssize_t dal::Dataset< T >::maxdims1D | ( | ) |
Returns the maximum length to which thus dataset can grow; -1 represents unbounded length.
size_t dal::Dataset< T >::ndims | ( | ) |
Returns the rank of the dataset.
void dal::Dataset< T >::resize | ( | const std::vector< ssize_t > & | newdims | ) |
Changes the dimensionality of the dataset. Elements of -1 represent unbounded dimensions. If this dataset uses internal storage (i.e. externalFiles() is empty), dimensions cannot be unbounded due to limitations of HDF5.
For now, resizing is only supported if external files are used.
void dal::Dataset< T >::resize1D | ( | ssize_t | newlen | ) |
See Dataset::resize().
void dal::Dataset< T >::set1D | ( | size_t | pos, |
const T * | inbuffer, | ||
size_t | len, | ||
unsigned | dimIndex = 0 |
||
) |
Stores `len` data values from a dataset starting at index `pos`. `inbuffer` must contain at least `len` data values. If the underlying dataset is multi-dimensional, use `dimIndex` to indicate the dimension to store to.
[in] | pos | index of the first data value |
[in] | inbuffer | 1D source array |
[in] | len | number of data values to store |
[in] | dimIndex | index of the dimension to query |
Requires:
void dal::Dataset< T >::set2D | ( | const std::vector< size_t > & | pos, |
const T * | inbuffer2, | ||
size_t | dim1, | ||
size_t | dim2, | ||
unsigned | dim1index = 0 , |
||
unsigned | dim2index = 1 |
||
) |
Stores a 2D matrix of data from a 2D dataset at position `pos`.
[in] | pos | position of the first sample |
[in] | inbuffer2 | 2D source array |
[in] | dim1 | size of first dimension of inbuffer2; determines the number of data values to store |
[in] | dim2 | size of second dimension of inbuffer2; determines the number of data values to store |
[in] | dim1index | index of the first dimension to query |
[in] | dim2index | index of the second dimension to query |
Requires:
void dal::Dataset< T >::set3D | ( | const std::vector< size_t > & | pos, |
const T * | inbuffer3, | ||
size_t | dim1, | ||
size_t | dim2, | ||
size_t | dim3, | ||
unsigned | dim1index = 0 , |
||
unsigned | dim2index = 1 , |
||
unsigned | dim3index = 2 |
||
) |
Stores a 3D matrix of data from a 3D dataset at position `pos`.
[in] | pos | position of the first sample |
[in] | inbuffer3 | 3D source array |
[in] | dim1 | size of first dimension of inbuffer3; determines the number of data values to store |
[in] | dim2 | size of second dimension of inbuffer3; determines the number of data values to store |
[in] | dim3 | size of third dimension of inbuffer3; determines the number of data values to store |
[in] | dim1index | index of the first dimension to query |
[in] | dim2index | index of the second dimension to query |
[in] | dim3index | index of the third dimension to query |
Requires:
void dal::Dataset< T >::setMatrix | ( | const std::vector< size_t > & | pos, |
const T * | buffer, | ||
const std::vector< size_t > & | size | ||
) |
Stores any matrix of data of sizes `size` at position `pos`.
Requires: pos.size() == size.size() == ndims()
void dal::Dataset< T >::setScalar | ( | const std::vector< size_t > & | pos, |
const T & | value | ||
) |
Stores a single value into the dataset at position `pos`.
Requires: pos.size() == ndims()
void dal::Dataset< T >::setScalar1D | ( | size_t | pos, |
T | value | ||
) |
See Dataset::setScalar().