1240 lines
45 KiB
Plaintext
Executable File
1240 lines
45 KiB
Plaintext
Executable File
__global__ void kernel() {
|
|
|
|
//======================================================================================================================================================
|
|
// COMMON VARIABLES
|
|
//======================================================================================================================================================
|
|
|
|
fp *d_in;
|
|
int rot_row;
|
|
int rot_col;
|
|
int in2_rowlow;
|
|
int in2_collow;
|
|
int ic;
|
|
int jc;
|
|
int jp1;
|
|
int ja1, ja2;
|
|
int ip1;
|
|
int ia1, ia2;
|
|
int ja, jb;
|
|
int ia, ib;
|
|
float s;
|
|
int i;
|
|
int j;
|
|
int row;
|
|
int col;
|
|
int ori_row;
|
|
int ori_col;
|
|
int position;
|
|
float sum;
|
|
int pos_ori;
|
|
float temp;
|
|
float temp2;
|
|
int location;
|
|
int cent;
|
|
int tMask_row;
|
|
int tMask_col;
|
|
float largest_value_current = 0;
|
|
float largest_value = 0;
|
|
int largest_coordinate_current = 0;
|
|
int largest_coordinate = 0;
|
|
float fin_max_val = 0;
|
|
int fin_max_coo = 0;
|
|
int largest_row;
|
|
int largest_col;
|
|
int offset_row;
|
|
int offset_col;
|
|
float in_partial_sum[51]; // WATCH THIS !!! HARDCODED VALUE
|
|
float in_sqr_partial_sum[51]; // WATCH THIS !!! HARDCODED VALUE
|
|
float in_final_sum;
|
|
float in_sqr_final_sum;
|
|
float mean;
|
|
float mean_sqr;
|
|
float variance;
|
|
float deviation;
|
|
float denomT;
|
|
float par_max_val[131]; // WATCH THIS !!! HARDCODED VALUE
|
|
int par_max_coo[131]; // WATCH THIS !!! HARDCODED VALUE
|
|
int pointer;
|
|
float d_in_mod_temp[2601];
|
|
int ori_pointer;
|
|
int loc_pointer;
|
|
|
|
//======================================================================================================================================================
|
|
// THREAD PARAMETERS
|
|
//======================================================================================================================================================
|
|
|
|
int bx = blockIdx.x; // get current horizontal block index (0-n)
|
|
int tx = threadIdx.x; // get current horizontal thread index (0-n)
|
|
int ei_new = tx;
|
|
|
|
//===============================================================================================================================================================================================================
|
|
//===============================================================================================================================================================================================================
|
|
// GENERATE TEMPLATE
|
|
//===============================================================================================================================================================================================================
|
|
//===============================================================================================================================================================================================================
|
|
printf("phase1\n");
|
|
// generate templates based on the first frame only
|
|
//======================================================================================================================================================
|
|
// GET POINTER TO TEMPLATE FOR THE POINT
|
|
//======================================================================================================================================================
|
|
|
|
// pointers to: current template for current point
|
|
d_in = &d_unique[bx].d_T[d_unique[bx].in_pointer];
|
|
|
|
//===============================================================================================================================================================================================================
|
|
//===============================================================================================================================================================================================================
|
|
// PROCESS POINTS
|
|
//===============================================================================================================================================================================================================
|
|
//===============================================================================================================================================================================================================
|
|
printf("phase2\n");
|
|
// process points in all frames except for the first one
|
|
|
|
//======================================================================================================================================================
|
|
// SELECTION
|
|
//======================================================================================================================================================
|
|
|
|
in2_rowlow =
|
|
d_unique[bx].d_Row[d_unique[bx].point_no] - d_common.sSize; // (1 to n+1)
|
|
in2_collow = d_unique[bx].d_Col[d_unique[bx].point_no] - d_common.sSize;
|
|
|
|
// work
|
|
ei_new = tx;
|
|
while (ei_new < d_common.in2_elem) {
|
|
|
|
// figure out row/col location in new matrix
|
|
row = (ei_new + 1) % d_common.in2_rows - 1; // (0-n) row
|
|
col = (ei_new + 1) / d_common.in2_rows + 1 - 1; // (0-n) column
|
|
if ((ei_new + 1) % d_common.in2_rows == 0) {
|
|
row = d_common.in2_rows - 1;
|
|
col = col - 1;
|
|
}
|
|
|
|
// figure out corresponding location in old matrix and copy values to new
|
|
// matrix
|
|
ori_row = row + in2_rowlow - 1;
|
|
ori_col = col + in2_collow - 1;
|
|
d_unique[bx].d_in2[ei_new] =
|
|
d_common_change.d_frame[ori_col * d_common.frame_rows + ori_row];
|
|
|
|
// go for second round
|
|
ei_new = ei_new + NUMBER_THREADS;
|
|
}
|
|
|
|
//======================================================================================================================================================
|
|
// SYNCHRONIZE THREADS
|
|
//======================================================================================================================================================
|
|
|
|
__syncthreads();
|
|
|
|
//======================================================================================================================================================
|
|
// CONVOLUTION
|
|
//======================================================================================================================================================
|
|
|
|
//====================================================================================================
|
|
// ROTATION
|
|
//====================================================================================================
|
|
|
|
// variables
|
|
d_in = &d_unique[bx].d_T[d_unique[bx].in_pointer];
|
|
|
|
// work
|
|
ei_new = tx;
|
|
while (ei_new < d_common.in_elem) {
|
|
|
|
// figure out row/col location in padded array
|
|
row = (ei_new + 1) % d_common.in_rows - 1; // (0-n) row
|
|
col = (ei_new + 1) / d_common.in_rows + 1 - 1; // (0-n) column
|
|
if ((ei_new + 1) % d_common.in_rows == 0) {
|
|
row = d_common.in_rows - 1;
|
|
col = col - 1;
|
|
}
|
|
|
|
// execution
|
|
rot_row = (d_common.in_rows - 1) - row;
|
|
rot_col = (d_common.in_rows - 1) - col;
|
|
d_in_mod_temp[ei_new] = d_in[rot_col * d_common.in_rows + rot_row];
|
|
|
|
// go for second round
|
|
ei_new = ei_new + NUMBER_THREADS;
|
|
}
|
|
|
|
//====================================================================================================
|
|
// SYNCHRONIZE THREADS
|
|
//====================================================================================================
|
|
|
|
__syncthreads();
|
|
|
|
//====================================================================================================
|
|
// ACTUAL CONVOLUTION
|
|
//====================================================================================================
|
|
|
|
// work
|
|
ei_new = tx;
|
|
while (ei_new < d_common.conv_elem) {
|
|
|
|
// figure out row/col location in array
|
|
ic = (ei_new + 1) % d_common.conv_rows; // (1-n)
|
|
jc = (ei_new + 1) / d_common.conv_rows + 1; // (1-n)
|
|
if ((ei_new + 1) % d_common.conv_rows == 0) {
|
|
ic = d_common.conv_rows;
|
|
jc = jc - 1;
|
|
}
|
|
|
|
//
|
|
j = jc + d_common.joffset;
|
|
jp1 = j + 1;
|
|
if (d_common.in2_cols < jp1) {
|
|
ja1 = jp1 - d_common.in2_cols;
|
|
} else {
|
|
ja1 = 1;
|
|
}
|
|
if (d_common.in_cols < j) {
|
|
ja2 = d_common.in_cols;
|
|
} else {
|
|
ja2 = j;
|
|
}
|
|
|
|
i = ic + d_common.ioffset;
|
|
ip1 = i + 1;
|
|
|
|
if (d_common.in2_rows < ip1) {
|
|
ia1 = ip1 - d_common.in2_rows;
|
|
} else {
|
|
ia1 = 1;
|
|
}
|
|
if (d_common.in_rows < i) {
|
|
ia2 = d_common.in_rows;
|
|
} else {
|
|
ia2 = i;
|
|
}
|
|
|
|
s = 0;
|
|
|
|
for (ja = ja1; ja <= ja2; ja++) {
|
|
jb = jp1 - ja;
|
|
for (ia = ia1; ia <= ia2; ia++) {
|
|
ib = ip1 - ia;
|
|
s = s + d_in_mod_temp[d_common.in_rows * (ja - 1) + ia - 1] *
|
|
d_unique[bx].d_in2[d_common.in2_rows * (jb - 1) + ib - 1];
|
|
}
|
|
}
|
|
|
|
// d_unique[bx].d_conv[d_common.conv_rows*(jc-1)+ic-1] = s;
|
|
d_unique[bx].d_conv[ei_new] = s;
|
|
|
|
// go for second round
|
|
ei_new = ei_new + NUMBER_THREADS;
|
|
}
|
|
|
|
//======================================================================================================================================================
|
|
// SYNCHRONIZE THREADS
|
|
//======================================================================================================================================================
|
|
|
|
__syncthreads();
|
|
|
|
//======================================================================================================================================================
|
|
// CUMULATIVE SUM
|
|
//======================================================================================================================================================
|
|
|
|
//====================================================================================================
|
|
// PAD ARRAY, VERTICAL CUMULATIVE SUM
|
|
//====================================================================================================
|
|
|
|
//==================================================
|
|
// PADD ARRAY
|
|
//==================================================
|
|
|
|
// work
|
|
ei_new = tx;
|
|
while (ei_new < d_common.in2_pad_cumv_elem) {
|
|
|
|
// figure out row/col location in padded array
|
|
row = (ei_new + 1) % d_common.in2_pad_cumv_rows - 1; // (0-n) row
|
|
col = (ei_new + 1) / d_common.in2_pad_cumv_rows + 1 - 1; // (0-n) column
|
|
if ((ei_new + 1) % d_common.in2_pad_cumv_rows == 0) {
|
|
row = d_common.in2_pad_cumv_rows - 1;
|
|
col = col - 1;
|
|
}
|
|
|
|
// execution
|
|
if (row > (d_common.in2_pad_add_rows -
|
|
1) && // do if has numbers in original array
|
|
row < (d_common.in2_pad_add_rows + d_common.in2_rows) &&
|
|
col > (d_common.in2_pad_add_cols - 1) &&
|
|
col < (d_common.in2_pad_add_cols + d_common.in2_cols)) {
|
|
ori_row = row - d_common.in2_pad_add_rows;
|
|
ori_col = col - d_common.in2_pad_add_cols;
|
|
d_unique[bx].d_in2_pad_cumv[ei_new] =
|
|
d_unique[bx].d_in2[ori_col * d_common.in2_rows + ori_row];
|
|
} else { // do if otherwise
|
|
d_unique[bx].d_in2_pad_cumv[ei_new] = 0;
|
|
}
|
|
|
|
// go for second round
|
|
ei_new = ei_new + NUMBER_THREADS;
|
|
}
|
|
|
|
//==================================================
|
|
// SYNCHRONIZE THREADS
|
|
//==================================================
|
|
|
|
__syncthreads();
|
|
|
|
//==================================================
|
|
// VERTICAL CUMULATIVE SUM
|
|
//==================================================
|
|
|
|
// work
|
|
ei_new = tx;
|
|
while (ei_new < d_common.in2_pad_cumv_cols) {
|
|
|
|
// figure out column position
|
|
pos_ori = ei_new * d_common.in2_pad_cumv_rows;
|
|
|
|
// variables
|
|
sum = 0;
|
|
|
|
// loop through all rows
|
|
for (position = pos_ori; position < pos_ori + d_common.in2_pad_cumv_rows;
|
|
position = position + 1) {
|
|
d_unique[bx].d_in2_pad_cumv[position] =
|
|
d_unique[bx].d_in2_pad_cumv[position] + sum;
|
|
sum = d_unique[bx].d_in2_pad_cumv[position];
|
|
}
|
|
|
|
// go for second round
|
|
ei_new = ei_new + NUMBER_THREADS;
|
|
}
|
|
|
|
//====================================================================================================
|
|
// SYNCHRONIZE THREADS
|
|
//====================================================================================================
|
|
|
|
__syncthreads();
|
|
|
|
//====================================================================================================
|
|
// SELECTION
|
|
//====================================================================================================
|
|
|
|
// work
|
|
ei_new = tx;
|
|
while (ei_new < d_common.in2_pad_cumv_sel_elem) {
|
|
|
|
// figure out row/col location in new matrix
|
|
row = (ei_new + 1) % d_common.in2_pad_cumv_sel_rows - 1; // (0-n) row
|
|
col = (ei_new + 1) / d_common.in2_pad_cumv_sel_rows + 1 - 1; // (0-n) column
|
|
if ((ei_new + 1) % d_common.in2_pad_cumv_sel_rows == 0) {
|
|
row = d_common.in2_pad_cumv_sel_rows - 1;
|
|
col = col - 1;
|
|
}
|
|
|
|
// figure out corresponding location in old matrix and copy values to new
|
|
// matrix
|
|
ori_row = row + d_common.in2_pad_cumv_sel_rowlow - 1;
|
|
ori_col = col + d_common.in2_pad_cumv_sel_collow - 1;
|
|
d_unique[bx].d_in2_pad_cumv_sel[ei_new] =
|
|
d_unique[bx]
|
|
.d_in2_pad_cumv[ori_col * d_common.in2_pad_cumv_rows + ori_row];
|
|
|
|
// go for second round
|
|
ei_new = ei_new + NUMBER_THREADS;
|
|
}
|
|
|
|
//====================================================================================================
|
|
// SYNCHRONIZE THREADS
|
|
//====================================================================================================
|
|
|
|
__syncthreads();
|
|
|
|
//====================================================================================================
|
|
// SELECTION 2, SUBTRACTION, HORIZONTAL CUMULATIVE SUM
|
|
//====================================================================================================
|
|
|
|
//==================================================
|
|
// SELECTION 2
|
|
//==================================================
|
|
|
|
// work
|
|
ei_new = tx;
|
|
while (ei_new < d_common.in2_sub_cumh_elem) {
|
|
|
|
// figure out row/col location in new matrix
|
|
row = (ei_new + 1) % d_common.in2_sub_cumh_rows - 1; // (0-n) row
|
|
col = (ei_new + 1) / d_common.in2_sub_cumh_rows + 1 - 1; // (0-n) column
|
|
if ((ei_new + 1) % d_common.in2_sub_cumh_rows == 0) {
|
|
row = d_common.in2_sub_cumh_rows - 1;
|
|
col = col - 1;
|
|
}
|
|
|
|
// figure out corresponding location in old matrix and copy values to new
|
|
// matrix
|
|
ori_row = row + d_common.in2_pad_cumv_sel2_rowlow - 1;
|
|
ori_col = col + d_common.in2_pad_cumv_sel2_collow - 1;
|
|
d_unique[bx].d_in2_sub_cumh[ei_new] =
|
|
d_unique[bx]
|
|
.d_in2_pad_cumv[ori_col * d_common.in2_pad_cumv_rows + ori_row];
|
|
|
|
// go for second round
|
|
ei_new = ei_new + NUMBER_THREADS;
|
|
}
|
|
|
|
//==================================================
|
|
// SYNCHRONIZE THREADS
|
|
//==================================================
|
|
|
|
__syncthreads();
|
|
|
|
//==================================================
|
|
// SUBTRACTION
|
|
//==================================================
|
|
|
|
// work
|
|
ei_new = tx;
|
|
while (ei_new < d_common.in2_sub_cumh_elem) {
|
|
|
|
// subtract
|
|
d_unique[bx].d_in2_sub_cumh[ei_new] =
|
|
d_unique[bx].d_in2_pad_cumv_sel[ei_new] -
|
|
d_unique[bx].d_in2_sub_cumh[ei_new];
|
|
|
|
// go for second round
|
|
ei_new = ei_new + NUMBER_THREADS;
|
|
}
|
|
|
|
//==================================================
|
|
// SYNCHRONIZE THREADS
|
|
//==================================================
|
|
|
|
__syncthreads();
|
|
|
|
//==================================================
|
|
// HORIZONTAL CUMULATIVE SUM
|
|
//==================================================
|
|
|
|
// work
|
|
ei_new = tx;
|
|
while (ei_new < d_common.in2_sub_cumh_rows) {
|
|
|
|
// figure out row position
|
|
pos_ori = ei_new;
|
|
|
|
// variables
|
|
sum = 0;
|
|
|
|
// loop through all rows
|
|
for (position = pos_ori; position < pos_ori + d_common.in2_sub_cumh_elem;
|
|
position = position + d_common.in2_sub_cumh_rows) {
|
|
d_unique[bx].d_in2_sub_cumh[position] =
|
|
d_unique[bx].d_in2_sub_cumh[position] + sum;
|
|
sum = d_unique[bx].d_in2_sub_cumh[position];
|
|
}
|
|
|
|
// go for second round
|
|
ei_new = ei_new + NUMBER_THREADS;
|
|
}
|
|
|
|
//====================================================================================================
|
|
// SYNCHRONIZE THREADS
|
|
//====================================================================================================
|
|
|
|
__syncthreads();
|
|
|
|
//====================================================================================================
|
|
// SELECTION
|
|
//====================================================================================================
|
|
|
|
// work
|
|
ei_new = tx;
|
|
while (ei_new < d_common.in2_sub_cumh_sel_elem) {
|
|
|
|
// figure out row/col location in new matrix
|
|
row = (ei_new + 1) % d_common.in2_sub_cumh_sel_rows - 1; // (0-n) row
|
|
col = (ei_new + 1) / d_common.in2_sub_cumh_sel_rows + 1 - 1; // (0-n) column
|
|
if ((ei_new + 1) % d_common.in2_sub_cumh_sel_rows == 0) {
|
|
row = d_common.in2_sub_cumh_sel_rows - 1;
|
|
col = col - 1;
|
|
}
|
|
|
|
// figure out corresponding location in old matrix and copy values to new
|
|
// matrix
|
|
ori_row = row + d_common.in2_sub_cumh_sel_rowlow - 1;
|
|
ori_col = col + d_common.in2_sub_cumh_sel_collow - 1;
|
|
d_unique[bx].d_in2_sub_cumh_sel[ei_new] =
|
|
d_unique[bx]
|
|
.d_in2_sub_cumh[ori_col * d_common.in2_sub_cumh_rows + ori_row];
|
|
|
|
// go for second round
|
|
ei_new = ei_new + NUMBER_THREADS;
|
|
}
|
|
|
|
//====================================================================================================
|
|
// SYNCHRONIZE THREADS
|
|
//====================================================================================================
|
|
|
|
__syncthreads();
|
|
|
|
//====================================================================================================
|
|
// SELECTION 2, SUBTRACTION
|
|
//====================================================================================================
|
|
|
|
//==================================================
|
|
// SELECTION 2
|
|
//==================================================
|
|
|
|
// work
|
|
ei_new = tx;
|
|
while (ei_new < d_common.in2_sub2_elem) {
|
|
|
|
// figure out row/col location in new matrix
|
|
row = (ei_new + 1) % d_common.in2_sub2_rows - 1; // (0-n) row
|
|
col = (ei_new + 1) / d_common.in2_sub2_rows + 1 - 1; // (0-n) column
|
|
if ((ei_new + 1) % d_common.in2_sub2_rows == 0) {
|
|
row = d_common.in2_sub2_rows - 1;
|
|
col = col - 1;
|
|
}
|
|
|
|
// figure out corresponding location in old matrix and copy values to new
|
|
// matrix
|
|
ori_row = row + d_common.in2_sub_cumh_sel2_rowlow - 1;
|
|
ori_col = col + d_common.in2_sub_cumh_sel2_collow - 1;
|
|
d_unique[bx].d_in2_sub2[ei_new] =
|
|
d_unique[bx]
|
|
.d_in2_sub_cumh[ori_col * d_common.in2_sub_cumh_rows + ori_row];
|
|
|
|
// go for second round
|
|
ei_new = ei_new + NUMBER_THREADS;
|
|
}
|
|
|
|
//==================================================
|
|
// SYNCHRONIZE THREADS
|
|
//==================================================
|
|
|
|
__syncthreads();
|
|
|
|
//==================================================
|
|
// SUBTRACTION
|
|
//==================================================
|
|
|
|
// work
|
|
ei_new = tx;
|
|
while (ei_new < d_common.in2_sub2_elem) {
|
|
|
|
// subtract
|
|
d_unique[bx].d_in2_sub2[ei_new] = d_unique[bx].d_in2_sub_cumh_sel[ei_new] -
|
|
d_unique[bx].d_in2_sub2[ei_new];
|
|
|
|
// go for second round
|
|
ei_new = ei_new + NUMBER_THREADS;
|
|
}
|
|
|
|
//======================================================================================================================================================
|
|
// SYNCHRONIZE THREADS
|
|
//======================================================================================================================================================
|
|
|
|
__syncthreads();
|
|
|
|
//======================================================================================================================================================
|
|
// CUMULATIVE SUM 2
|
|
//======================================================================================================================================================
|
|
|
|
//====================================================================================================
|
|
// MULTIPLICATION
|
|
//====================================================================================================
|
|
|
|
// work
|
|
ei_new = tx;
|
|
while (ei_new < d_common.in2_sqr_elem) {
|
|
|
|
temp = d_unique[bx].d_in2[ei_new];
|
|
d_unique[bx].d_in2_sqr[ei_new] = temp * temp;
|
|
|
|
// go for second round
|
|
ei_new = ei_new + NUMBER_THREADS;
|
|
}
|
|
|
|
//====================================================================================================
|
|
// SYNCHRONIZE THREADS
|
|
//====================================================================================================
|
|
|
|
__syncthreads();
|
|
|
|
//====================================================================================================
|
|
// PAD ARRAY, VERTICAL CUMULATIVE SUM
|
|
//====================================================================================================
|
|
|
|
//==================================================
|
|
// PAD ARRAY
|
|
//==================================================
|
|
|
|
// work
|
|
ei_new = tx;
|
|
while (ei_new < d_common.in2_pad_cumv_elem) {
|
|
|
|
// figure out row/col location in padded array
|
|
row = (ei_new + 1) % d_common.in2_pad_cumv_rows - 1; // (0-n) row
|
|
col = (ei_new + 1) / d_common.in2_pad_cumv_rows + 1 - 1; // (0-n) column
|
|
if ((ei_new + 1) % d_common.in2_pad_cumv_rows == 0) {
|
|
row = d_common.in2_pad_cumv_rows - 1;
|
|
col = col - 1;
|
|
}
|
|
|
|
// execution
|
|
if (row > (d_common.in2_pad_add_rows -
|
|
1) && // do if has numbers in original array
|
|
row < (d_common.in2_pad_add_rows + d_common.in2_sqr_rows) &&
|
|
col > (d_common.in2_pad_add_cols - 1) &&
|
|
col < (d_common.in2_pad_add_cols + d_common.in2_sqr_cols)) {
|
|
ori_row = row - d_common.in2_pad_add_rows;
|
|
ori_col = col - d_common.in2_pad_add_cols;
|
|
d_unique[bx].d_in2_pad_cumv[ei_new] =
|
|
d_unique[bx].d_in2_sqr[ori_col * d_common.in2_sqr_rows + ori_row];
|
|
} else { // do if otherwise
|
|
d_unique[bx].d_in2_pad_cumv[ei_new] = 0;
|
|
}
|
|
|
|
// go for second round
|
|
ei_new = ei_new + NUMBER_THREADS;
|
|
}
|
|
|
|
//==================================================
|
|
// SYNCHRONIZE THREADS
|
|
//==================================================
|
|
|
|
__syncthreads();
|
|
|
|
//==================================================
|
|
// VERTICAL CUMULATIVE SUM
|
|
//==================================================
|
|
|
|
// work
|
|
ei_new = tx;
|
|
while (ei_new < d_common.in2_pad_cumv_cols) {
|
|
|
|
// figure out column position
|
|
pos_ori = ei_new * d_common.in2_pad_cumv_rows;
|
|
|
|
// variables
|
|
sum = 0;
|
|
|
|
// loop through all rows
|
|
for (position = pos_ori; position < pos_ori + d_common.in2_pad_cumv_rows;
|
|
position = position + 1) {
|
|
d_unique[bx].d_in2_pad_cumv[position] =
|
|
d_unique[bx].d_in2_pad_cumv[position] + sum;
|
|
sum = d_unique[bx].d_in2_pad_cumv[position];
|
|
}
|
|
|
|
// go for second round
|
|
ei_new = ei_new + NUMBER_THREADS;
|
|
}
|
|
|
|
//====================================================================================================
|
|
// SYNCHRONIZE THREADS
|
|
//====================================================================================================
|
|
|
|
__syncthreads();
|
|
|
|
//====================================================================================================
|
|
// SELECTION
|
|
//====================================================================================================
|
|
|
|
// work
|
|
ei_new = tx;
|
|
while (ei_new < d_common.in2_pad_cumv_sel_elem) {
|
|
|
|
// figure out row/col location in new matrix
|
|
row = (ei_new + 1) % d_common.in2_pad_cumv_sel_rows - 1; // (0-n) row
|
|
col = (ei_new + 1) / d_common.in2_pad_cumv_sel_rows + 1 - 1; // (0-n) column
|
|
if ((ei_new + 1) % d_common.in2_pad_cumv_sel_rows == 0) {
|
|
row = d_common.in2_pad_cumv_sel_rows - 1;
|
|
col = col - 1;
|
|
}
|
|
|
|
// figure out corresponding location in old matrix and copy values to new
|
|
// matrix
|
|
ori_row = row + d_common.in2_pad_cumv_sel_rowlow - 1;
|
|
ori_col = col + d_common.in2_pad_cumv_sel_collow - 1;
|
|
d_unique[bx].d_in2_pad_cumv_sel[ei_new] =
|
|
d_unique[bx]
|
|
.d_in2_pad_cumv[ori_col * d_common.in2_pad_cumv_rows + ori_row];
|
|
|
|
// go for second round
|
|
ei_new = ei_new + NUMBER_THREADS;
|
|
}
|
|
|
|
//====================================================================================================
|
|
// SYNCHRONIZE THREADS
|
|
//====================================================================================================
|
|
|
|
__syncthreads();
|
|
|
|
//====================================================================================================
|
|
// SELECTION 2, SUBTRACTION, HORIZONTAL CUMULATIVE SUM
|
|
//====================================================================================================
|
|
|
|
//==================================================
|
|
// SELECTION 2
|
|
//==================================================
|
|
|
|
// work
|
|
ei_new = tx;
|
|
while (ei_new < d_common.in2_sub_cumh_elem) {
|
|
|
|
// figure out row/col location in new matrix
|
|
row = (ei_new + 1) % d_common.in2_sub_cumh_rows - 1; // (0-n) row
|
|
col = (ei_new + 1) / d_common.in2_sub_cumh_rows + 1 - 1; // (0-n) column
|
|
if ((ei_new + 1) % d_common.in2_sub_cumh_rows == 0) {
|
|
row = d_common.in2_sub_cumh_rows - 1;
|
|
col = col - 1;
|
|
}
|
|
|
|
// figure out corresponding location in old matrix and copy values to new
|
|
// matrix
|
|
ori_row = row + d_common.in2_pad_cumv_sel2_rowlow - 1;
|
|
ori_col = col + d_common.in2_pad_cumv_sel2_collow - 1;
|
|
d_unique[bx].d_in2_sub_cumh[ei_new] =
|
|
d_unique[bx]
|
|
.d_in2_pad_cumv[ori_col * d_common.in2_pad_cumv_rows + ori_row];
|
|
|
|
// go for second round
|
|
ei_new = ei_new + NUMBER_THREADS;
|
|
}
|
|
|
|
//==================================================
|
|
// SYNCHRONIZE THREADS
|
|
//==================================================
|
|
|
|
__syncthreads();
|
|
|
|
//==================================================
|
|
// SUBTRACTION
|
|
//==================================================
|
|
|
|
// work
|
|
ei_new = tx;
|
|
while (ei_new < d_common.in2_sub_cumh_elem) {
|
|
|
|
// subtract
|
|
d_unique[bx].d_in2_sub_cumh[ei_new] =
|
|
d_unique[bx].d_in2_pad_cumv_sel[ei_new] -
|
|
d_unique[bx].d_in2_sub_cumh[ei_new];
|
|
|
|
// go for second round
|
|
ei_new = ei_new + NUMBER_THREADS;
|
|
}
|
|
|
|
//==================================================
|
|
// HORIZONTAL CUMULATIVE SUM
|
|
//==================================================
|
|
|
|
// work
|
|
ei_new = tx;
|
|
while (ei_new < d_common.in2_sub_cumh_rows) {
|
|
|
|
// figure out row position
|
|
pos_ori = ei_new;
|
|
|
|
// variables
|
|
sum = 0;
|
|
|
|
// loop through all rows
|
|
for (position = pos_ori; position < pos_ori + d_common.in2_sub_cumh_elem;
|
|
position = position + d_common.in2_sub_cumh_rows) {
|
|
d_unique[bx].d_in2_sub_cumh[position] =
|
|
d_unique[bx].d_in2_sub_cumh[position] + sum;
|
|
sum = d_unique[bx].d_in2_sub_cumh[position];
|
|
}
|
|
|
|
// go for second round
|
|
ei_new = ei_new + NUMBER_THREADS;
|
|
}
|
|
|
|
//====================================================================================================
|
|
// SYNCHRONIZE THREADS
|
|
//====================================================================================================
|
|
|
|
__syncthreads();
|
|
|
|
//====================================================================================================
|
|
// SELECTION
|
|
//====================================================================================================
|
|
|
|
// work
|
|
ei_new = tx;
|
|
while (ei_new < d_common.in2_sub_cumh_sel_elem) {
|
|
|
|
// figure out row/col location in new matrix
|
|
row = (ei_new + 1) % d_common.in2_sub_cumh_sel_rows - 1; // (0-n) row
|
|
col = (ei_new + 1) / d_common.in2_sub_cumh_sel_rows + 1 - 1; // (0-n) column
|
|
if ((ei_new + 1) % d_common.in2_sub_cumh_sel_rows == 0) {
|
|
row = d_common.in2_sub_cumh_sel_rows - 1;
|
|
col = col - 1;
|
|
}
|
|
|
|
// figure out corresponding location in old matrix and copy values to new
|
|
// matrix
|
|
ori_row = row + d_common.in2_sub_cumh_sel_rowlow - 1;
|
|
ori_col = col + d_common.in2_sub_cumh_sel_collow - 1;
|
|
d_unique[bx].d_in2_sub_cumh_sel[ei_new] =
|
|
d_unique[bx]
|
|
.d_in2_sub_cumh[ori_col * d_common.in2_sub_cumh_rows + ori_row];
|
|
|
|
// go for second round
|
|
ei_new = ei_new + NUMBER_THREADS;
|
|
}
|
|
|
|
//====================================================================================================
|
|
// SYNCHRONIZE THREADS
|
|
//====================================================================================================
|
|
|
|
__syncthreads();
|
|
|
|
//====================================================================================================
|
|
// SELECTION 2, SUBTRACTION
|
|
//====================================================================================================
|
|
|
|
//==================================================
|
|
// SELECTION 2
|
|
//==================================================
|
|
|
|
// work
|
|
ei_new = tx;
|
|
while (ei_new < d_common.in2_sub2_elem) {
|
|
|
|
// figure out row/col location in new matrix
|
|
row = (ei_new + 1) % d_common.in2_sub2_rows - 1; // (0-n) row
|
|
col = (ei_new + 1) / d_common.in2_sub2_rows + 1 - 1; // (0-n) column
|
|
if ((ei_new + 1) % d_common.in2_sub2_rows == 0) {
|
|
row = d_common.in2_sub2_rows - 1;
|
|
col = col - 1;
|
|
}
|
|
|
|
// figure out corresponding location in old matrix and copy values to new
|
|
// matrix
|
|
ori_row = row + d_common.in2_sub_cumh_sel2_rowlow - 1;
|
|
ori_col = col + d_common.in2_sub_cumh_sel2_collow - 1;
|
|
d_unique[bx].d_in2_sqr_sub2[ei_new] =
|
|
d_unique[bx]
|
|
.d_in2_sub_cumh[ori_col * d_common.in2_sub_cumh_rows + ori_row];
|
|
|
|
// go for second round
|
|
ei_new = ei_new + NUMBER_THREADS;
|
|
}
|
|
|
|
//==================================================
|
|
// SYNCHRONIZE THREADS
|
|
//==================================================
|
|
|
|
__syncthreads();
|
|
|
|
//==================================================
|
|
// SUBTRACTION
|
|
//==================================================
|
|
|
|
// work
|
|
ei_new = tx;
|
|
while (ei_new < d_common.in2_sub2_elem) {
|
|
|
|
// subtract
|
|
d_unique[bx].d_in2_sqr_sub2[ei_new] =
|
|
d_unique[bx].d_in2_sub_cumh_sel[ei_new] -
|
|
d_unique[bx].d_in2_sqr_sub2[ei_new];
|
|
|
|
// go for second round
|
|
ei_new = ei_new + NUMBER_THREADS;
|
|
}
|
|
|
|
//======================================================================================================================================================
|
|
// SYNCHRONIZE THREADS
|
|
//======================================================================================================================================================
|
|
|
|
__syncthreads();
|
|
|
|
//======================================================================================================================================================
|
|
// FINAL
|
|
//======================================================================================================================================================
|
|
|
|
//====================================================================================================
|
|
// DENOMINATOR A SAVE RESULT IN CUMULATIVE SUM A2
|
|
//====================================================================================================
|
|
|
|
// work
|
|
ei_new = tx;
|
|
while (ei_new < d_common.in2_sub2_elem) {
|
|
|
|
temp = d_unique[bx].d_in2_sub2[ei_new];
|
|
temp2 =
|
|
d_unique[bx].d_in2_sqr_sub2[ei_new] - (temp * temp / d_common.in_elem);
|
|
if (temp2 < 0) {
|
|
temp2 = 0;
|
|
}
|
|
d_unique[bx].d_in2_sqr_sub2[ei_new] = sqrt(temp2);
|
|
|
|
// go for second round
|
|
ei_new = ei_new + NUMBER_THREADS;
|
|
}
|
|
|
|
//====================================================================================================
|
|
// SYNCHRONIZE THREADS
|
|
//====================================================================================================
|
|
|
|
__syncthreads();
|
|
|
|
//====================================================================================================
|
|
// MULTIPLICATION
|
|
//====================================================================================================
|
|
|
|
// work
|
|
ei_new = tx;
|
|
while (ei_new < d_common.in_sqr_elem) {
|
|
|
|
temp = d_in[ei_new];
|
|
d_unique[bx].d_in_sqr[ei_new] = temp * temp;
|
|
|
|
// go for second round
|
|
ei_new = ei_new + NUMBER_THREADS;
|
|
}
|
|
|
|
//====================================================================================================
|
|
// SYNCHRONIZE THREADS
|
|
//====================================================================================================
|
|
|
|
__syncthreads();
|
|
|
|
//====================================================================================================
|
|
// IN SUM
|
|
//====================================================================================================
|
|
|
|
// work
|
|
ei_new = tx;
|
|
while (ei_new < d_common.in_cols) {
|
|
|
|
sum = 0;
|
|
for (i = 0; i < d_common.in_rows; i++) {
|
|
|
|
sum = sum + d_in[ei_new * d_common.in_rows + i];
|
|
}
|
|
in_partial_sum[ei_new] = sum;
|
|
|
|
// go for second round
|
|
ei_new = ei_new + NUMBER_THREADS;
|
|
}
|
|
|
|
//====================================================================================================
|
|
// SYNCHRONIZE THREADS
|
|
//====================================================================================================
|
|
|
|
__syncthreads();
|
|
|
|
//====================================================================================================
|
|
// IN_SQR SUM
|
|
//====================================================================================================
|
|
|
|
ei_new = tx;
|
|
while (ei_new < d_common.in_sqr_rows) {
|
|
|
|
sum = 0;
|
|
for (i = 0; i < d_common.in_sqr_cols; i++) {
|
|
|
|
sum = sum + d_unique[bx].d_in_sqr[ei_new + d_common.in_sqr_rows * i];
|
|
}
|
|
in_sqr_partial_sum[ei_new] = sum;
|
|
|
|
// go for second round
|
|
ei_new = ei_new + NUMBER_THREADS;
|
|
}
|
|
|
|
//====================================================================================================
|
|
// SYNCHRONIZE THREADS
|
|
//====================================================================================================
|
|
|
|
__syncthreads();
|
|
|
|
//====================================================================================================
|
|
// FINAL SUMMATION
|
|
//====================================================================================================
|
|
|
|
in_final_sum = 0;
|
|
for (i = 0; i < d_common.in_cols; i++) {
|
|
in_final_sum = in_final_sum + in_partial_sum[i];
|
|
}
|
|
|
|
{
|
|
|
|
in_sqr_final_sum = 0;
|
|
for (i = 0; i < d_common.in_sqr_cols; i++) {
|
|
in_sqr_final_sum = in_sqr_final_sum + in_sqr_partial_sum[i];
|
|
}
|
|
|
|
//====================================================================================================
|
|
// SYNCHRONIZE THREADS
|
|
//====================================================================================================
|
|
|
|
__syncthreads();
|
|
|
|
//====================================================================================================
|
|
// DENOMINATOR T
|
|
//====================================================================================================
|
|
|
|
mean = in_final_sum /
|
|
d_common.in_elem; // gets mean (average) value of element in ROI
|
|
mean_sqr = mean * mean;
|
|
variance = (in_sqr_final_sum / d_common.in_elem) -
|
|
mean_sqr; // gets variance of ROI
|
|
deviation = sqrt(variance); // gets standard deviation of ROI
|
|
|
|
denomT = sqrt(float(d_common.in_elem - 1)) * deviation;
|
|
|
|
//====================================================================================================
|
|
// SYNCHRONIZE THREADS
|
|
//====================================================================================================
|
|
|
|
__syncthreads();
|
|
|
|
//====================================================================================================
|
|
// DENOMINATOR SAVE RESULT IN CUMULATIVE SUM A2
|
|
//====================================================================================================
|
|
|
|
// work
|
|
ei_new = tx;
|
|
while (ei_new < d_common.in2_sub2_elem) {
|
|
|
|
d_unique[bx].d_in2_sqr_sub2[ei_new] =
|
|
d_unique[bx].d_in2_sqr_sub2[ei_new] * denomT;
|
|
|
|
// go for second round
|
|
ei_new = ei_new + NUMBER_THREADS;
|
|
}
|
|
|
|
//====================================================================================================
|
|
// SYNCHRONIZE THREADS
|
|
//====================================================================================================
|
|
|
|
__syncthreads();
|
|
|
|
//====================================================================================================
|
|
// NUMERATOR SAVE RESULT IN CONVOLUTION
|
|
//====================================================================================================
|
|
|
|
// work
|
|
ei_new = tx;
|
|
while (ei_new < d_common.conv_elem) {
|
|
|
|
d_unique[bx].d_conv[ei_new] =
|
|
d_unique[bx].d_conv[ei_new] -
|
|
d_unique[bx].d_in2_sub2[ei_new] * in_final_sum / d_common.in_elem;
|
|
|
|
// go for second round
|
|
ei_new = ei_new + NUMBER_THREADS;
|
|
}
|
|
|
|
//====================================================================================================
|
|
// SYNCHRONIZE THREADS
|
|
//====================================================================================================
|
|
|
|
__syncthreads();
|
|
|
|
//====================================================================================================
|
|
// CORRELATION SAVE RESULT IN CUMULATIVE SUM A2
|
|
//====================================================================================================
|
|
|
|
// work
|
|
ei_new = tx;
|
|
while (ei_new < d_common.in2_sub2_elem) {
|
|
|
|
d_unique[bx].d_in2_sqr_sub2[ei_new] =
|
|
d_unique[bx].d_conv[ei_new] / d_unique[bx].d_in2_sqr_sub2[ei_new];
|
|
|
|
// go for second round
|
|
ei_new = ei_new + NUMBER_THREADS;
|
|
}
|
|
|
|
//======================================================================================================================================================
|
|
// SYNCHRONIZE THREADS
|
|
//======================================================================================================================================================
|
|
|
|
__syncthreads();
|
|
|
|
//======================================================================================================================================================
|
|
// TEMPLATE MASK CREATE
|
|
//======================================================================================================================================================
|
|
|
|
cent = d_common.sSize + d_common.tSize + 1;
|
|
if (d_common_change.frame_no == 0) {
|
|
tMask_row = cent + d_unique[bx].d_Row[d_unique[bx].point_no] -
|
|
d_unique[bx].d_Row[d_unique[bx].point_no] - 1;
|
|
tMask_col = cent + d_unique[bx].d_Col[d_unique[bx].point_no] -
|
|
d_unique[bx].d_Col[d_unique[bx].point_no] - 1;
|
|
} else {
|
|
pointer = d_common_change.frame_no - 1 +
|
|
d_unique[bx].point_no * d_common.no_frames;
|
|
tMask_row = cent + d_unique[bx].d_tRowLoc[pointer] -
|
|
d_unique[bx].d_Row[d_unique[bx].point_no] - 1;
|
|
tMask_col = cent + d_unique[bx].d_tColLoc[pointer] -
|
|
d_unique[bx].d_Col[d_unique[bx].point_no] - 1;
|
|
}
|
|
|
|
// work
|
|
ei_new = tx;
|
|
while (ei_new < d_common.tMask_elem) {
|
|
|
|
location = tMask_col * d_common.tMask_rows + tMask_row;
|
|
|
|
if (ei_new == location) {
|
|
d_unique[bx].d_tMask[ei_new] = 1;
|
|
} else {
|
|
d_unique[bx].d_tMask[ei_new] = 0;
|
|
}
|
|
|
|
// go for second round
|
|
ei_new = ei_new + NUMBER_THREADS;
|
|
}
|
|
|
|
//======================================================================================================================================================
|
|
// SYNCHRONIZE THREADS
|
|
//======================================================================================================================================================
|
|
|
|
__syncthreads();
|
|
|
|
//======================================================================================================================================================
|
|
// MASK CONVOLUTION
|
|
//======================================================================================================================================================
|
|
|
|
// work
|
|
ei_new = tx;
|
|
while (ei_new < d_common.mask_conv_elem) {
|
|
|
|
// figure out row/col location in array
|
|
ic = (ei_new + 1) % d_common.mask_conv_rows; // (1-n)
|
|
jc = (ei_new + 1) / d_common.mask_conv_rows + 1; // (1-n)
|
|
if ((ei_new + 1) % d_common.mask_conv_rows == 0) {
|
|
ic = d_common.mask_conv_rows;
|
|
jc = jc - 1;
|
|
}
|
|
|
|
//
|
|
j = jc + d_common.mask_conv_joffset;
|
|
jp1 = j + 1;
|
|
if (d_common.mask_cols < jp1) {
|
|
ja1 = jp1 - d_common.mask_cols;
|
|
} else {
|
|
ja1 = 1;
|
|
}
|
|
if (d_common.tMask_cols < j) {
|
|
ja2 = d_common.tMask_cols;
|
|
} else {
|
|
ja2 = j;
|
|
}
|
|
|
|
i = ic + d_common.mask_conv_ioffset;
|
|
ip1 = i + 1;
|
|
|
|
if (d_common.mask_rows < ip1) {
|
|
ia1 = ip1 - d_common.mask_rows;
|
|
} else {
|
|
ia1 = 1;
|
|
}
|
|
if (d_common.tMask_rows < i) {
|
|
ia2 = d_common.tMask_rows;
|
|
} else {
|
|
ia2 = i;
|
|
}
|
|
|
|
s = 0;
|
|
|
|
for (ja = ja1; ja <= ja2; ja++) {
|
|
jb = jp1 - ja;
|
|
for (ia = ia1; ia <= ia2; ia++) {
|
|
ib = ip1 - ia;
|
|
s = s +
|
|
d_unique[bx].d_tMask[d_common.tMask_rows * (ja - 1) + ia - 1] * 1;
|
|
}
|
|
}
|
|
|
|
// //d_unique[bx].d_mask_conv[d_common.mask_conv_rows*(jc-1)+ic-1] = s;
|
|
d_unique[bx].d_mask_conv[ei_new] =
|
|
d_unique[bx].d_in2_sqr_sub2[ei_new] * s;
|
|
|
|
// go for second round
|
|
ei_new = ei_new + NUMBER_THREADS;
|
|
}
|
|
|
|
//======================================================================================================================================================
|
|
// SYNCHRONIZE THREADS
|
|
//======================================================================================================================================================
|
|
|
|
__syncthreads();
|
|
|
|
//======================================================================================================================================================
|
|
// MAXIMUM VALUE
|
|
//======================================================================================================================================================
|
|
|
|
//====================================================================================================
|
|
// INITIAL SEARCH
|
|
//====================================================================================================
|
|
|
|
ei_new = tx;
|
|
while (ei_new < d_common.mask_conv_rows) {
|
|
|
|
for (i = 0; i < d_common.mask_conv_cols; i++) {
|
|
largest_coordinate_current = ei_new * d_common.mask_conv_rows + i;
|
|
largest_value_current =
|
|
abs(d_unique[bx].d_mask_conv[largest_coordinate_current]);
|
|
if (largest_value_current > largest_value) {
|
|
largest_coordinate = largest_coordinate_current;
|
|
largest_value = largest_value_current;
|
|
}
|
|
}
|
|
par_max_coo[ei_new] = largest_coordinate;
|
|
par_max_val[ei_new] = largest_value;
|
|
|
|
// go for second round
|
|
ei_new = ei_new + NUMBER_THREADS;
|
|
}
|
|
|
|
//====================================================================================================
|
|
// SYNCHRONIZE THREADS
|
|
//====================================================================================================
|
|
|
|
__syncthreads();
|
|
|
|
//====================================================================================================
|
|
// FINAL SEARCH
|
|
//====================================================================================================
|
|
|
|
for (i = 0; i < d_common.mask_conv_rows; i++) {
|
|
if (par_max_val[i] > fin_max_val) {
|
|
fin_max_val = par_max_val[i];
|
|
fin_max_coo = par_max_coo[i];
|
|
}
|
|
}
|
|
|
|
// convert coordinate to row/col form
|
|
largest_row = (fin_max_coo + 1) % d_common.mask_conv_rows - 1; // (0-n) row
|
|
largest_col = (fin_max_coo + 1) / d_common.mask_conv_rows; // (0-n) column
|
|
if ((fin_max_coo + 1) % d_common.mask_conv_rows == 0) {
|
|
largest_row = d_common.mask_conv_rows - 1;
|
|
largest_col = largest_col - 1;
|
|
}
|
|
|
|
// calculate offset
|
|
largest_row = largest_row + 1; // compensate to match MATLAB format (1-n)
|
|
largest_col = largest_col + 1; // compensate to match MATLAB format (1-n)
|
|
offset_row =
|
|
largest_row - d_common.in_rows - (d_common.sSize - d_common.tSize);
|
|
offset_col =
|
|
largest_col - d_common.in_cols - (d_common.sSize - d_common.tSize);
|
|
pointer =
|
|
d_common_change.frame_no + d_unique[bx].point_no * d_common.no_frames;
|
|
d_unique[bx].d_tRowLoc[pointer] =
|
|
d_unique[bx].d_Row[d_unique[bx].point_no] + offset_row;
|
|
d_unique[bx].d_tColLoc[pointer] =
|
|
d_unique[bx].d_Col[d_unique[bx].point_no] + offset_col;
|
|
|
|
//======================================================================================================================================================
|
|
// SYNCHRONIZE THREADS
|
|
//======================================================================================================================================================
|
|
|
|
__syncthreads();
|
|
}
|