339 lines
15 KiB
C++
339 lines
15 KiB
C++
/// line 248 the index
|
|
/// @file transform_buffer.h
|
|
/// @brief Buffer with separated even and odd columns and related algorithms.
|
|
/// @author Martin Jirman (207962@mail.muni.cz)
|
|
/// @date 2011-01-20 18:33
|
|
///
|
|
///
|
|
/// Copyright (c) 2011 Martin Jirman
|
|
/// All rights reserved.
|
|
///
|
|
/// Redistribution and use in source and binary forms, with or without
|
|
/// modification, are permitted provided that the following conditions are met:
|
|
///
|
|
/// * Redistributions of source code must retain the above copyright
|
|
/// notice, this list of conditions and the following disclaimer.
|
|
/// * Redistributions in binary form must reproduce the above copyright
|
|
/// notice, this list of conditions and the following disclaimer in the
|
|
/// documentation and/or other materials provided with the distribution.
|
|
///
|
|
/// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
/// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
/// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
/// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
|
|
/// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
/// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
/// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
/// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
/// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
/// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
/// POSSIBILITY OF SUCH DAMAGE.
|
|
///
|
|
|
|
#ifndef TRANSFORM_BUFFER_H
|
|
#define TRANSFORM_BUFFER_H
|
|
|
|
namespace dwt_cuda {
|
|
|
|
/// Buffer (in shared memory of GPU) where block of input image is stored,
|
|
/// but odd and even lines are separated. (Generates less bank conflicts when
|
|
/// using lifting schema.) All operations expect SIZE_X threads.
|
|
/// Also implements basic building blocks of lifting schema.
|
|
/// @tparam SIZE_X width of the buffer excluding two boundaries (Also
|
|
/// a number of threads participating on all operations.)
|
|
/// Must be divisible by 4.
|
|
/// @tparam SIZE_Y height of buffer (total number of lines)
|
|
/// @tparam BOUNDARY_X number of extra pixels at the left and right side
|
|
/// boundary is expected to be smaller than half SIZE_X
|
|
/// Must be divisible by 2.
|
|
template <typename T, int SIZE_X, int SIZE_Y, int BOUNDARY_X>
|
|
class TransformBuffer {
|
|
public:
|
|
enum {
|
|
/// difference between pointers to two vertical neigbors
|
|
VERTICAL_STRIDE = BOUNDARY_X + (SIZE_X / 2)
|
|
};
|
|
|
|
private:
|
|
enum {
|
|
/// number of shared memory banks - needed for correct padding
|
|
#ifdef __CUDA_ARCH__
|
|
SHM_BANKS = ((__CUDA_ARCH__ >= 200) ? 32 : 16),
|
|
#else
|
|
SHM_BANKS = 16, // for host code only - can be anything, won't be used
|
|
#endif
|
|
|
|
/// size of one of two buffers (odd or even)
|
|
BUFFER_SIZE = VERTICAL_STRIDE * SIZE_Y,
|
|
|
|
/// unused space between two buffers
|
|
PADDING = SHM_BANKS - ((BUFFER_SIZE + SHM_BANKS / 2) % SHM_BANKS),
|
|
|
|
/// offset of the odd columns buffer from the beginning of data buffer
|
|
ODD_OFFSET = BUFFER_SIZE + PADDING,
|
|
};
|
|
|
|
/// buffer for both even and odd columns
|
|
T data[2 * BUFFER_SIZE + PADDING];
|
|
|
|
/// Applies specified function to all central elements while also passing
|
|
/// previous and next elements as parameters.
|
|
/// @param count count of central elements to apply function to
|
|
/// @param prevOffset offset of first central element
|
|
/// @param midOffset offset of first central element's predecessor
|
|
/// @param nextOffset offset of first central element's successor
|
|
/// @param function the function itself
|
|
template <typename FUNC>
|
|
__device__ void horizontalStep(const int count, const int prevOffset,
|
|
const int midOffset, const int nextOffset,
|
|
const FUNC &function) {
|
|
// number of unchecked iterations
|
|
const int STEPS = count / SIZE_X;
|
|
|
|
// items remaining after last unchecked iteration
|
|
const int finalCount = count % SIZE_X;
|
|
|
|
// offset of items processed in last (checked) iteration
|
|
const int finalOffset = count - finalCount;
|
|
|
|
// all threads perform fixed number of iterations ...
|
|
for (int i = 0; i < STEPS; i++) {
|
|
// for(int i = 0; i < 3; i++) {
|
|
const T previous = data[prevOffset + i * SIZE_X + threadIdx.x];
|
|
const T next = data[nextOffset + i * SIZE_X + threadIdx.x];
|
|
T ¢er = data[midOffset + i * SIZE_X + threadIdx.x];
|
|
// function(previous, center, (nextOffset + i*SIZE_X+threadIdx.x));
|
|
function(previous, center, next); // the real one
|
|
}
|
|
|
|
// ... but not all threads participate on final iteration
|
|
if (threadIdx.x < finalCount) {
|
|
const T previous = data[prevOffset + finalOffset + threadIdx.x];
|
|
const T next = data[nextOffset + finalOffset + threadIdx.x];
|
|
T ¢er = data[midOffset + finalOffset + threadIdx.x];
|
|
// function(previous, center, (nextOffset+finalOffset+threadIdx.x));
|
|
// kaixi
|
|
function(previous, center, next); // the real one
|
|
}
|
|
}
|
|
|
|
public:
|
|
__device__ void getPrintData() {
|
|
//
|
|
for (int i = 0; i < 2 * BUFFER_SIZE + PADDING; i++) {
|
|
printf(" index: %d data: %f \n ", i, data[i]);
|
|
}
|
|
}
|
|
|
|
/// Gets offset of the column with given index. Central columns have
|
|
/// indices from 0 to NUM_LINES - 1, left boundary columns have negative
|
|
/// indices and right boundary columns indices start with NUM_LINES.
|
|
/// @param columnIndex index of column to get pointer to
|
|
/// @return offset of the first item of column with specified index
|
|
__device__ int getColumnOffset(int columnIndex) {
|
|
columnIndex += BOUNDARY_X; // skip boundary
|
|
return columnIndex / 2 // select right column
|
|
+ (columnIndex & 1) * ODD_OFFSET; // select odd or even buffer
|
|
}
|
|
|
|
/// Provides access to data of the transform buffer.
|
|
/// @param index index of the item to work with
|
|
/// @return reference to item at given index
|
|
__device__ T &operator[](const int index) { return data[index]; }
|
|
|
|
/// Applies specified function to all horizontally even elements in
|
|
/// specified lines. (Including even elements in boundaries except
|
|
/// first even element in first left boundary.) SIZE_X threads participate
|
|
/// and synchronization is needed before result can be used.
|
|
/// @param firstLine index of first line
|
|
/// @param numLines count of lines
|
|
/// @param func function to be applied on all even elements
|
|
/// parameters: previous (odd) element, the even
|
|
/// element itself and finally next (odd) element
|
|
template <typename FUNC>
|
|
__device__ void forEachHorizontalEven(const int firstLine, const int numLines,
|
|
const FUNC &func) {
|
|
// number of even elemens to apply function to
|
|
const int count = numLines * VERTICAL_STRIDE - 1;
|
|
// offset of first even element
|
|
const int centerOffset = firstLine * VERTICAL_STRIDE + 1;
|
|
// offset of odd predecessor of first even element
|
|
const int prevOffset = firstLine * VERTICAL_STRIDE + ODD_OFFSET;
|
|
// offset of odd successor of first even element
|
|
const int nextOffset = prevOffset + 1;
|
|
|
|
// if(threadIdx.x == 0) {
|
|
|
|
// printf("forEachHorizontalEven count %d, centerOffset %d prevOffset %d
|
|
// nextOffset %d \n", count, centerOffset, prevOffset, nextOffset);
|
|
// }
|
|
|
|
// call generic horizontal step function
|
|
horizontalStep(count, prevOffset, centerOffset, nextOffset, func);
|
|
}
|
|
|
|
/// Applies given function to all horizontally odd elements in specified
|
|
/// lines. (Including odd elements in boundaries except last odd element
|
|
/// in last right boundary.) SIZE_X threads participate and synchronization
|
|
/// is needed before result can be used.
|
|
/// @param firstLine index of first line
|
|
/// @param numLines count of lines
|
|
/// @param func function to be applied on all odd elements
|
|
/// parameters: previous (even) element, the odd
|
|
/// element itself and finally next (even) element
|
|
template <typename FUNC>
|
|
__device__ void forEachHorizontalOdd(const int firstLine, const int numLines,
|
|
const FUNC &func) {
|
|
// numbet of odd elements to apply function to
|
|
const int count = numLines * VERTICAL_STRIDE - 1;
|
|
// offset of even predecessor of first odd element
|
|
const int prevOffset = firstLine * VERTICAL_STRIDE;
|
|
// offset of first odd element
|
|
const int centerOffset = prevOffset + ODD_OFFSET;
|
|
// offset of even successor of first odd element
|
|
const int nextOffset = prevOffset + 1;
|
|
|
|
// if(threadIdx.x == 0) {
|
|
// printf("forEachHorizontalOdd count %d, centerOffset %d prevOffset %d
|
|
// nextOffset %d \n", count, centerOffset, prevOffset, nextOffset);
|
|
// }
|
|
|
|
// call generic horizontal step function
|
|
horizontalStep(count, prevOffset, centerOffset, nextOffset, func);
|
|
}
|
|
|
|
/// Applies specified function to all even elements (except element #0)
|
|
/// of given column. Each thread takes care of one column, so there's
|
|
/// no need for synchronization.
|
|
/// @param columnOffset offset of thread's column
|
|
/// @param f function to be applied on all even elements
|
|
/// parameters: previous (odd) element, the even
|
|
/// element itself and finally next (odd) element
|
|
template <typename F>
|
|
__device__ void forEachVerticalEven(const int columnOffset, const F &f) {
|
|
if (SIZE_Y > 3) { // makes no sense otherwise
|
|
const int steps = SIZE_Y / 2 - 1;
|
|
for (int i = 0; i < steps; i++) {
|
|
const int row = 2 + i * 2;
|
|
const T prev = data[columnOffset + (row - 1) * VERTICAL_STRIDE];
|
|
const T next = data[columnOffset + (row + 1) * VERTICAL_STRIDE];
|
|
f(prev, data[columnOffset + row * VERTICAL_STRIDE], next);
|
|
|
|
//--------------- FOR TEST -----------------
|
|
/* __syncthreads();
|
|
if ((blockIdx.x * blockDim.x + threadIdx.x) == 0){
|
|
diffOut[2500]++;
|
|
diffOut[diffOut[2500]] = 2;//data[columnOffset +
|
|
row * VERTICAL_STRIDE];
|
|
}
|
|
__syncthreads();
|
|
*/ //--------------- FOR TEST -----------------
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Applies specified function to all odd elements of given column.
|
|
/// Each thread takes care of one column, so there's no need for
|
|
/// synchronization.
|
|
/// @param columnOffset offset of thread's column
|
|
/// @param f function to be applied on all odd elements
|
|
/// parameters: previous (even) element, the odd
|
|
/// element itself and finally next (even) element
|
|
template <typename F>
|
|
__device__ void forEachVerticalOdd(const int columnOffset, const F &f) {
|
|
const int steps = (SIZE_Y - 1) / 2;
|
|
for (int i = 0; i < steps; i++) {
|
|
const int row = i * 2 + 1;
|
|
const T prev = data[columnOffset + (row - 1) * VERTICAL_STRIDE];
|
|
const T next = data[columnOffset + (row + 1) * VERTICAL_STRIDE];
|
|
|
|
f(prev, data[columnOffset + row * VERTICAL_STRIDE], next);
|
|
|
|
//--------------- FOR TEST -----------------
|
|
/* __syncthreads();
|
|
if ((blockIdx.x * blockDim.x + threadIdx.x) == 0){
|
|
diffOut[2500]++;
|
|
diffOut[diffOut[2500]] = 1; //data[columnOffset +
|
|
row * VERTICAL_STRIDE];
|
|
}
|
|
|
|
__syncthreads();
|
|
*/ //--------------- FOR TEST -----------------
|
|
}
|
|
}
|
|
|
|
/// Scales elements at specified lines.
|
|
/// @param evenScale scaling factor for horizontally even elements
|
|
/// @param oddScale scaling factor for horizontally odd elements
|
|
/// @param numLines number of lines, whose elements should be scaled
|
|
/// @param firstLine index of first line to scale elements in
|
|
__device__ void scaleHorizontal(const T evenScale, const T oddScale,
|
|
const int firstLine, const int numLines) {
|
|
const int offset = firstLine * VERTICAL_STRIDE;
|
|
const int count = numLines * VERTICAL_STRIDE;
|
|
const int steps = count / SIZE_X;
|
|
const int finalCount = count % SIZE_X;
|
|
const int finalOffset = count - finalCount;
|
|
|
|
// printf("scaleHorizontal sizeX: %d offset %d, count, %d, steps, %d,
|
|
// finalCount %d, finalOffset %d \n", SIZE_X, offset, count, steps,
|
|
// finalCount, finalOffset);
|
|
|
|
// run iterations, whete all threads participate
|
|
for (int i = 0; i < steps; i++) {
|
|
data[threadIdx.x + i * SIZE_X + offset] *= evenScale;
|
|
// if(threadIdx.x + i * SIZE_X + offset == 531) {
|
|
// printf("threadidx 531: %d \n", threadIdx.x);
|
|
// }
|
|
// if(threadIdx.x + i * SIZE_X + offset + ODD_OFFSET == 531) {
|
|
// printf("threadidx 531: %d \n", threadIdx.x);
|
|
// }
|
|
data[threadIdx.x + i * SIZE_X + offset + ODD_OFFSET] *= oddScale;
|
|
}
|
|
|
|
// some threads also finish remaining unscaled items
|
|
if (threadIdx.x < finalCount) {
|
|
data[threadIdx.x + finalOffset + offset] *= evenScale;
|
|
// if(threadIdx.x + finalOffset + offset == 531) {
|
|
// printf("threadidx 531: %d \n", threadIdx.x);
|
|
// }
|
|
// if(threadIdx.x + finalOffset + offset + ODD_OFFSET == 531) {
|
|
// printf("threadidx 531: %d \n", threadIdx.x);
|
|
// }
|
|
data[threadIdx.x + finalOffset + offset + ODD_OFFSET] *= oddScale;
|
|
}
|
|
}
|
|
|
|
/// Scales elements in specified column.
|
|
/// @param evenScale scaling factor for vertically even elements
|
|
/// @param oddScale scaling factor for vertically odd elements
|
|
/// @param columnOffset offset of the column to work with
|
|
/// @param numLines number of lines, whose elements should be scaled
|
|
/// @param firstLine index of first line to scale elements in
|
|
__device__ void scaleVertical(const T evenScale, const T oddScale,
|
|
const int columnOffset, const int numLines,
|
|
const int firstLine) {
|
|
for (int i = firstLine; i < (numLines + firstLine); i++) {
|
|
if (i & 1) {
|
|
data[columnOffset + i * VERTICAL_STRIDE] *= oddScale;
|
|
} else {
|
|
data[columnOffset + i * VERTICAL_STRIDE] *= evenScale;
|
|
}
|
|
}
|
|
}
|
|
|
|
//****************For Test(Feb23), test inter parameters*************
|
|
__device__ int getVERTICAL_STRIDE() { return VERTICAL_STRIDE; }
|
|
__device__ int getSHM_BANKS() { return SHM_BANKS; }
|
|
__device__ int getBuffersize() { return BUFFER_SIZE; }
|
|
__device__ int getPADDING() { return PADDING; }
|
|
__device__ int getODD_OFFSET() { return ODD_OFFSET; }
|
|
|
|
//****************For Test(Feb23), test inter parameters*************
|
|
|
|
}; // end of class TransformBuffer
|
|
|
|
} // namespace dwt_cuda
|
|
|
|
#endif // TRANSFORM_BUFFER_H
|