386 lines
13 KiB
Plaintext
Executable File
386 lines
13 KiB
Plaintext
Executable File
/*
|
|
* Copyright (c) 2009, Jiri Matela
|
|
* All rights reserved.
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions are met:
|
|
*
|
|
* * Redistributions of source code must retain the above copyright
|
|
* notice, this list of conditions and the following disclaimer.
|
|
* * Redistributions in binary form must reproduce the above copyright
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
* documentation and/or other materials provided with the distribution.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
|
|
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
* POSSIBILITY OF SUCH DAMAGE.
|
|
*/
|
|
|
|
#include <stdio.h>
|
|
#include <fcntl.h>
|
|
#include <assert.h>
|
|
#include <errno.h>
|
|
#include <sys/time.h>
|
|
#include <unistd.h>
|
|
#include <error.h>
|
|
#include "dwt_cuda/dwt.h"
|
|
#include "dwt_cuda/common.h"
|
|
#include "dwt.h"
|
|
#include "common.h"
|
|
#include <iostream>
|
|
#include <fstream>
|
|
|
|
inline void fdwt(float *in, float *out, int width, int height, int levels)
|
|
{
|
|
printf(" Running fdwt97 Float \n");
|
|
dwt_cuda::fdwt97(in, out, width, height, levels);
|
|
}
|
|
/*
|
|
inline void fdwt(float *in, float *out, int width, int height, int levels, float *diffOut)
|
|
{
|
|
dwt_cuda::fdwt97(in, out, width, height, levels, diffOut);
|
|
}
|
|
*/
|
|
|
|
|
|
|
|
inline void fdwt(int *in, int *out, int width, int height, int levels)
|
|
{
|
|
printf(" Running fdwt53 Int \n");
|
|
|
|
dwt_cuda::fdwt53(in, out, width, height, levels);
|
|
}
|
|
/*
|
|
inline void fdwt(int *in, int *out, int width, int height, int levels, int *diffOut)
|
|
{
|
|
dwt_cuda::fdwt53(in, out, width, height, levels, diffOut);
|
|
}
|
|
*/
|
|
|
|
|
|
|
|
inline void rdwt(float *in, float *out, int width, int height, int levels)
|
|
{
|
|
printf(" Running rdwt97 Float \n");
|
|
|
|
dwt_cuda::rdwt97(in, out, width, height, levels);
|
|
}
|
|
|
|
inline void rdwt(int *in, int *out, int width, int height, int levels)
|
|
{
|
|
printf(" Running rdwt53 Int \n");
|
|
|
|
dwt_cuda::rdwt53(in, out, width, height, levels);
|
|
}
|
|
|
|
template<typename T>
|
|
int nStage2dDWT(T * in, T * out, T * backup, int pixWidth, int pixHeight, int stages, bool forward)
|
|
{
|
|
printf("\n*** %d stages of 2D forward DWT:\n", stages);
|
|
|
|
/* create backup of input, because each test iteration overwrites it */
|
|
const int size = pixHeight * pixWidth * sizeof(T);
|
|
cudaMemcpy(backup, in, size, cudaMemcpyDeviceToDevice);
|
|
cudaCheckError("Memcopy device to device");
|
|
|
|
/* Measure time of individual levels. */
|
|
if(forward)
|
|
fdwt(in, out, pixWidth, pixHeight, stages);
|
|
else
|
|
rdwt(in, out, pixWidth, pixHeight, stages);
|
|
|
|
// Measure overall time of DWT.
|
|
/* #ifdef GPU_DWT_TESTING_1
|
|
|
|
dwt_cuda::CudaDWTTester tester;
|
|
for(int i = tester.getNumIterations(); i--; ) {
|
|
// Recover input and measure one overall DWT run.
|
|
cudaMemcpy(in, backup, size, cudaMemcpyDeviceToDevice);
|
|
cudaCheckError("Memcopy device to device");
|
|
tester.beginTestIteration();
|
|
if(forward)
|
|
fdwt(in, out, pixWidth, pixHeight, stages);
|
|
else
|
|
rdwt(in, out, pixWidth, pixHeight, stages);
|
|
tester.endTestIteration();
|
|
}
|
|
tester.showPerformance(" Overall DWT", pixWidth, pixHeight);
|
|
#endif // GPU_DWT_TESTING
|
|
|
|
cudaCheckAsyncError("DWT Kernel calls");
|
|
*/ return 0;
|
|
}
|
|
template int nStage2dDWT<float>(float*, float*, float*, int, int, int, bool);
|
|
template int nStage2dDWT<int>(int*, int*, int*, int, int, int, bool);
|
|
|
|
|
|
|
|
/*
|
|
template<typename T>
|
|
int nStage2dDWT(T * in, T * out, T * backup, int pixWidth, int pixHeight, int stages, bool forward, T * diffOut)
|
|
{
|
|
printf("*** %d stages of 2D forward DWT:\n", stages);
|
|
|
|
// create backup of input, because each test iteration overwrites it
|
|
const int size = pixHeight * pixWidth * sizeof(T);
|
|
cudaMemcpy(backup, in, size, cudaMemcpyDeviceToDevice);
|
|
cudaCheckError("Memcopy device to device");
|
|
|
|
// Measure time of individual levels.
|
|
if(forward)
|
|
fdwt(in, out, pixWidth, pixHeight, stages, diffOut);
|
|
else
|
|
rdwt(in, out, pixWidth, pixHeight, stages);
|
|
|
|
// Measure overall time of DWT.
|
|
#ifdef GPU_DWT_TESTING_1
|
|
|
|
dwt_cuda::CudaDWTTester tester;
|
|
for(int i = tester.getNumIterations(); i--; ) {
|
|
// Recover input and measure one overall DWT run.
|
|
cudaMemcpy(in, backup, size, cudaMemcpyDeviceToDevice);
|
|
cudaCheckError("Memcopy device to device");
|
|
tester.beginTestIteration();
|
|
if(forward)
|
|
fdwt(in, out, pixWidth, pixHeight, stages, diffOut);
|
|
else
|
|
rdwt(in, out, pixWidth, pixHeight, stages);
|
|
tester.endTestIteration();
|
|
}
|
|
tester.showPerformance(" Overall DWT", pixWidth, pixHeight);
|
|
#endif // GPU_DWT_TESTING
|
|
|
|
cudaCheckAsyncError("DWT Kernel calls");
|
|
return 0;
|
|
}
|
|
template int nStage2dDWT<float>(float*, float*, float*, int, int, int, bool, float*);
|
|
template int nStage2dDWT<int>(int*, int*, int*, int, int, int, bool, int*);
|
|
|
|
*/
|
|
|
|
void samplesToChar(unsigned char * dst, float * src, int samplesNum, const char * filename)
|
|
{
|
|
int i;
|
|
std::ofstream outputFile;
|
|
char outfile[strlen(filename)+strlen(".txt")];
|
|
strcpy(outfile, filename);
|
|
strcpy(outfile+strlen(filename), ".txt");
|
|
outputFile.open(outfile);
|
|
|
|
|
|
for(i = 0; i < samplesNum; i++) {
|
|
float r = (src[i]+0.5f) * 255;
|
|
if (r > 255) r = 255;
|
|
if (r < 0) r = 0;
|
|
dst[i] = (unsigned char)r;
|
|
outputFile << "index: " << i << " val: "<< r <<" \n";
|
|
|
|
|
|
}
|
|
outputFile.close();
|
|
}
|
|
|
|
void samplesToChar(unsigned char * dst, int * src, int samplesNum, const char * filename)
|
|
{
|
|
int i;
|
|
std::ofstream outputFile;
|
|
char outfile[strlen(filename)+strlen(".txt")];
|
|
strcpy(outfile, filename);
|
|
strcpy(outfile+strlen(filename), ".txt");
|
|
outputFile.open(outfile);
|
|
for(i = 0; i < samplesNum; i++) {
|
|
int r = src[i]+128;
|
|
if (r > 255) r = 255;
|
|
if (r < 0) r = 0;
|
|
dst[i] = (unsigned char)r;
|
|
// added this line to output check
|
|
outputFile << "index: " << i << " val: "<< r <<" \n";
|
|
}
|
|
outputFile.close();
|
|
}
|
|
|
|
///* Write output linear orderd*/
|
|
template<typename T>
|
|
int writeLinear(T *component_cuda, int pixWidth, int pixHeight,
|
|
const char * filename, const char * suffix)
|
|
{
|
|
unsigned char * result;
|
|
T *gpu_output;
|
|
int i;
|
|
int size;
|
|
int samplesNum = pixWidth*pixHeight;
|
|
|
|
size = samplesNum*sizeof(T);
|
|
cudaMallocHost((void **)&gpu_output, size);
|
|
cudaCheckError("Malloc host");
|
|
memset(gpu_output, 0, size);
|
|
result = (unsigned char *)malloc(samplesNum);
|
|
cudaMemcpy(gpu_output, component_cuda, size, cudaMemcpyDeviceToHost);
|
|
cudaCheckError("Memcopy device to host");
|
|
|
|
/* T to char */
|
|
samplesToChar(result, gpu_output, samplesNum, filename);
|
|
|
|
/* Write component */
|
|
char outfile[strlen(filename)+strlen(suffix)];
|
|
strcpy(outfile, filename);
|
|
strcpy(outfile+strlen(filename), suffix);
|
|
i = open(outfile, O_CREAT|O_WRONLY, 0644);
|
|
if (i == -1) {
|
|
error(0,errno,"cannot access %s", outfile);
|
|
return -1;
|
|
}
|
|
printf("\nWriting to %s (%d x %d)\n", outfile, pixWidth, pixHeight);
|
|
ssize_t x ;
|
|
x = write(i, result, samplesNum);
|
|
close(i);
|
|
|
|
/* Clean up */
|
|
cudaFreeHost(gpu_output);
|
|
cudaCheckError("Cuda free host memory");
|
|
free(result);
|
|
if(x == 0) return 1;
|
|
return 0;
|
|
}
|
|
template int writeLinear<float>(float *component_cuda, int pixWidth, int pixHeight, const char * filename, const char * suffix);
|
|
template int writeLinear<int>(int *component_cuda, int pixWidth, int pixHeight, const char * filename, const char * suffix);
|
|
|
|
/* Write output visual ordered */
|
|
template<typename T>
|
|
int writeNStage2DDWT(T *component_cuda, int pixWidth, int pixHeight,
|
|
int stages, const char * filename, const char * suffix)
|
|
{
|
|
struct band {
|
|
int dimX;
|
|
int dimY;
|
|
};
|
|
struct dimensions {
|
|
struct band LL;
|
|
struct band HL;
|
|
struct band LH;
|
|
struct band HH;
|
|
};
|
|
|
|
unsigned char * result;
|
|
T *src, *dst;
|
|
int i,s;
|
|
int size;
|
|
int offset;
|
|
int yOffset;
|
|
int samplesNum = pixWidth*pixHeight;
|
|
struct dimensions * bandDims;
|
|
|
|
bandDims = (struct dimensions *)malloc(stages * sizeof(struct dimensions));
|
|
|
|
bandDims[0].LL.dimX = DIVANDRND(pixWidth,2);
|
|
bandDims[0].LL.dimY = DIVANDRND(pixHeight,2);
|
|
bandDims[0].HL.dimX = pixWidth - bandDims[0].LL.dimX;
|
|
bandDims[0].HL.dimY = bandDims[0].LL.dimY;
|
|
bandDims[0].LH.dimX = bandDims[0].LL.dimX;
|
|
bandDims[0].LH.dimY = pixHeight - bandDims[0].LL.dimY;
|
|
bandDims[0].HH.dimX = bandDims[0].HL.dimX;
|
|
bandDims[0].HH.dimY = bandDims[0].LH.dimY;
|
|
|
|
for (i = 1; i < stages; i++) {
|
|
bandDims[i].LL.dimX = DIVANDRND(bandDims[i-1].LL.dimX,2);
|
|
bandDims[i].LL.dimY = DIVANDRND(bandDims[i-1].LL.dimY,2);
|
|
bandDims[i].HL.dimX = bandDims[i-1].LL.dimX - bandDims[i].LL.dimX;
|
|
bandDims[i].HL.dimY = bandDims[i].LL.dimY;
|
|
bandDims[i].LH.dimX = bandDims[i].LL.dimX;
|
|
bandDims[i].LH.dimY = bandDims[i-1].LL.dimY - bandDims[i].LL.dimY;
|
|
bandDims[i].HH.dimX = bandDims[i].HL.dimX;
|
|
bandDims[i].HH.dimY = bandDims[i].LH.dimY;
|
|
}
|
|
|
|
#if 0
|
|
printf("Original image pixWidth x pixHeight: %d x %d\n", pixWidth, pixHeight);
|
|
for (i = 0; i < stages; i++) {
|
|
printf("Stage %d: LL: pixWidth x pixHeight: %d x %d\n", i, bandDims[i].LL.dimX, bandDims[i].LL.dimY);
|
|
printf("Stage %d: HL: pixWidth x pixHeight: %d x %d\n", i, bandDims[i].HL.dimX, bandDims[i].HL.dimY);
|
|
printf("Stage %d: LH: pixWidth x pixHeight: %d x %d\n", i, bandDims[i].LH.dimX, bandDims[i].LH.dimY);
|
|
printf("Stage %d: HH: pixWidth x pixHeight: %d x %d\n", i, bandDims[i].HH.dimX, bandDims[i].HH.dimY);
|
|
}
|
|
#endif
|
|
|
|
size = samplesNum*sizeof(T);
|
|
cudaMallocHost((void **)&src, size);
|
|
cudaCheckError("Malloc host");
|
|
dst = (T*)malloc(size);
|
|
memset(src, 0, size);
|
|
memset(dst, 0, size);
|
|
result = (unsigned char *)malloc(samplesNum);
|
|
cudaMemcpy(src, component_cuda, size, cudaMemcpyDeviceToHost);
|
|
cudaCheckError("Memcopy device to host");
|
|
|
|
// LL Band
|
|
size = bandDims[stages-1].LL.dimX * sizeof(T);
|
|
for (i = 0; i < bandDims[stages-1].LL.dimY; i++) {
|
|
memcpy(dst+i*pixWidth, src+i*bandDims[stages-1].LL.dimX, size);
|
|
}
|
|
|
|
for (s = stages - 1; s >= 0; s--) {
|
|
// HL Band
|
|
size = bandDims[s].HL.dimX * sizeof(T);
|
|
offset = bandDims[s].LL.dimX * bandDims[s].LL.dimY;
|
|
for (i = 0; i < bandDims[s].HL.dimY; i++) {
|
|
memcpy(dst+i*pixWidth+bandDims[s].LL.dimX,
|
|
src+offset+i*bandDims[s].HL.dimX,
|
|
size);
|
|
}
|
|
|
|
// LH band
|
|
size = bandDims[s].LH.dimX * sizeof(T);
|
|
offset += bandDims[s].HL.dimX * bandDims[s].HL.dimY;
|
|
yOffset = bandDims[s].LL.dimY;
|
|
for (i = 0; i < bandDims[s].HL.dimY; i++) {
|
|
memcpy(dst+(yOffset+i)*pixWidth,
|
|
src+offset+i*bandDims[s].LH.dimX,
|
|
size);
|
|
}
|
|
|
|
//HH band
|
|
size = bandDims[s].HH.dimX * sizeof(T);
|
|
offset += bandDims[s].LH.dimX * bandDims[s].LH.dimY;
|
|
yOffset = bandDims[s].HL.dimY;
|
|
for (i = 0; i < bandDims[s].HH.dimY; i++) {
|
|
memcpy(dst+(yOffset+i)*pixWidth+bandDims[s].LH.dimX,
|
|
src+offset+i*bandDims[s].HH.dimX,
|
|
size);
|
|
}
|
|
}
|
|
|
|
/* Write component */
|
|
samplesToChar(result, dst, samplesNum, filename);
|
|
|
|
char outfile[strlen(filename)+strlen(suffix)];
|
|
strcpy(outfile, filename);
|
|
strcpy(outfile+strlen(filename), suffix);
|
|
i = open(outfile, O_CREAT|O_WRONLY, 0644);
|
|
if (i == -1) {
|
|
error(0,errno,"cannot access %s", outfile);
|
|
return -1;
|
|
}
|
|
printf("\nWriting to %s (%d x %d)\n", outfile, pixWidth, pixHeight);
|
|
ssize_t x;
|
|
x = write(i, result, samplesNum);
|
|
close(i);
|
|
|
|
cudaFreeHost(src);
|
|
cudaCheckError("Cuda free host memory");
|
|
free(dst);
|
|
free(result);
|
|
free(bandDims);
|
|
if (x == 0) return 1;
|
|
return 0;
|
|
}
|
|
template int writeNStage2DDWT<float>(float *component_cuda, int pixWidth, int pixHeight, int stages, const char * filename, const char * suffix);
|
|
template int writeNStage2DDWT<int>(int *component_cuda, int pixWidth, int pixHeight, int stages, const char * filename, const char * suffix);
|