26 static void XDerivative(
float *Dest,
float *ConvTemp,
const float *Src,
27 int Width,
int Height)
33 for(y = 0, i = 0, iEnd = -1; y < Height; y++)
35 ConvTemp[i] = Src[i + 1] - Src[i];
38 for(iEnd += Width; i < iEnd; i++)
39 ConvTemp[i] = Src[i + 1] - Src[i - 1];
41 ConvTemp[i] = Src[i] - Src[i - 1];
45 for(y = 0, i = iEnd = 0; y < Height; y++)
47 il = (y > 0) ? -Width : 0;
48 ir = (y < Height - 1) ? Width : 0;
50 for(iEnd += Width; i < iEnd; i++)
51 Dest[i] = 0.3125f*ConvTemp[i]
52 + 0.09375f*(ConvTemp[i + ir] + ConvTemp[i + il]);
58 static void YDerivative(
float *Dest,
float *ConvTemp,
const float *Src,
59 int Width,
int Height)
61 int i, il, ir, iEnd, y;
64 for(y = 0, i = iEnd = 0; y < Height; y++)
66 il = (y > 0) ? -Width : 0;
67 ir = (y < Height - 1) ? Width : 0;
69 for(iEnd += Width; i < iEnd; i++)
70 ConvTemp[i] = Src[i + ir] - Src[i + il];
73 for(y = 0, i = 0, iEnd = -1; y < Height; y++)
75 Dest[i] = 0.40625f*ConvTemp[i] + 0.09375f*ConvTemp[i + 1];
78 for(iEnd += Width; i < iEnd; i++)
79 Dest[i] = 0.3125f*ConvTemp[i]
80 + 0.09375f*(ConvTemp[i + 1] + ConvTemp[i - 1]);
82 Dest[i] = 0.40625f*ConvTemp[i] + 0.09375f*ConvTemp[i - 1];
89 static void ComputeTensor(
float *Txx,
float *Txy,
float *Tyy,
90 float *uSmooth,
float *ConvTemp,
const float *u,
int Width,
int Height,
94 const double KSquared = K*K;
95 const int NumPixels = Width*Height;
96 const int NumEl = 3*NumPixels;
98 double Tm, SqrtTm, Trace, Lambda1, Lambda2, EigVecX, EigVecY, Temp;
100 int i, iu, id, il, ir, x, y, Channel;
107 #pragma omp parallel sections private(i)
111 for(i = 0; i < NumPixels; i++)
116 for(i = 0; i < NumPixels; i++)
121 for(i = 0; i < NumPixels; i++)
128 PreSmooth, PreSmooth, Boundary, Width, Height, 1);
133 ConvTemp + NumPixels, u + NumPixels,
134 PreSmooth, PreSmooth, Boundary, Width, Height, 1);
139 ConvTemp + 2*NumPixels, u + 2*NumPixels,
140 PreSmooth, PreSmooth, Boundary, Width, Height, 1);
144 for(i = 0; i < NumPixels; i++)
147 for(i = 0; i < NumPixels; i++)
150 for(i = 0; i < NumPixels; i++)
154 PreSmooth, PreSmooth, Boundary, Width, Height, 3);
158 for(Channel = 0; Channel < NumEl; Channel += NumPixels)
160 for(y = 0, i = 0; y < Height; y++)
162 iu = (y > 0) ? -Width : 0;
163 id = (y < Height - 1) ? Width : 0;
165 for(x = 0; x < Width; x++, i++)
167 il = (x > 0) ? -1 : 0;
168 ir = (x < Width - 1) ? 1 : 0;
170 ux = (uSmooth[i + ir + Channel]
171 - uSmooth[i + il + Channel]) / 2;
172 uy = (uSmooth[i +
id + Channel]
173 - uSmooth[i + iu + Channel]) / 2;
183 #pragma omp parallel sections
188 PostSmooth, PostSmooth, Boundary, Width, Height, 1);
193 PostSmooth, PostSmooth, Boundary, Width, Height, 1);
198 PostSmooth, PostSmooth, Boundary, Width, Height, 1);
203 PostSmooth, PostSmooth, Boundary, Width, Height, 1);
205 PostSmooth, PostSmooth, Boundary, Width, Height, 1);
207 PostSmooth, PostSmooth, Boundary, Width, Height, 1);
211 for(i = 0; i < NumPixels; i++)
214 Trace = 0.5*(Txx[i] + Tyy[i]);
215 Temp = sqrt(Trace*Trace - Txx[i]*Tyy[i] + Txy[i]*Txy[i]);
216 Lambda1 = Trace - Temp;
217 Lambda2 = Trace + Temp;
219 EigVecY = Lambda1 - Txx[i];
220 Temp = sqrt(EigVecX*EigVecX + EigVecY*EigVecY);
226 Tm = KSquared/(KSquared + (Lambda1 + Lambda2));
230 Txx[i] = (float)(dt*(SqrtTm*EigVecX*EigVecX + Tm*EigVecY*EigVecY));
231 Txy[i] = (float)(dt*((SqrtTm - Tm)*EigVecX*EigVecY));
232 Tyy[i] = (float)(dt*(SqrtTm*EigVecY*EigVecY + Tm*EigVecX*EigVecX));
245 static void DiffuseWithTensor(
float *u,
float *vx,
float *vy,
246 float *SumX,
float *SumY,
247 float *ConvTemp,
const float *Txx,
const float *Txy,
const float *Tyy,
248 int Width,
int Height,
int DiffIter)
250 const int NumPixels = Width*Height;
251 int i, Channel, Step;
254 for(Channel = 0; Channel < 3; Channel++, u += NumPixels)
256 for(Step = 0; Step < DiffIter; Step++)
265 XDerivative(vx, ConvTemp, u, Width, Height);
269 YDerivative(vy, ConvTemp + NumPixels, u, Width, Height);
273 #pragma omp for schedule(static)
274 for(i = 0; i < NumPixels; i++)
276 SumX[i] = Txx[i]*vx[i] + Txy[i]*vy[i];
277 SumY[i] = Txy[i]*vx[i] + Tyy[i]*vy[i];
284 XDerivative(SumX, ConvTemp,
285 SumX, Width, Height);
289 YDerivative(SumY, ConvTemp + NumPixels,
290 SumY, Width, Height);
294 #pragma omp for schedule(static)
295 for(i = 0; i < NumPixels; i++)
296 u[i] += SumX[i] + SumY[i];
299 XDerivative(vx, ConvTemp, u, Width, Height);
300 YDerivative(vy, ConvTemp, u, Width, Height);
302 for(i = 0; i < NumPixels; i++)
304 SumX[i] = Txx[i]*vx[i] + Txy[i]*vy[i];
305 SumY[i] = Txy[i]*vx[i] + Tyy[i]*vy[i];
308 XDerivative(SumX, ConvTemp, SumX, Width, Height);
309 YDerivative(SumY, ConvTemp, SumY, Width, Height);
311 for(i = 0; i < NumPixels; i++)
312 u[i] += SumX[i] + SumY[i];
320 static void SumAliases(
float *u,
int d,
int Width,
int Height)
322 const int BlockWidth = Width / d;
323 const int BlockHeight = Height / d;
324 const int StripSize = Width*BlockHeight;
330 for(y = 0; y < Height; y++)
334 for(k = 1; k < d; k++)
336 Src = Dest + k*BlockWidth;
338 for(x = 0; x < BlockWidth; x++)
344 for(k = 1; k < d; k++)
347 Src = u + Width*(k*BlockHeight);
349 for(y = 0; y < BlockHeight; y++)
351 for(x = 0; x < BlockWidth; x++)
360 for(y = 0, Src = Dest = u; y < Height; y++, Src += Width)
362 for(k = 1, Dest += BlockWidth; k < d; k++, Dest += BlockWidth)
364 memcpy(Dest, Src,
sizeof(
float)*BlockWidth);
369 for(k = 1, Dest = u + StripSize; k < d; k++, Dest += StripSize)
370 memcpy(Dest, u,
sizeof(
float)*StripSize);
375 static void MakePhi(
float *Phi,
double PsfSigma,
int d,
int Width,
int Height)
378 const int NumOverlaps = 2;
379 const int NumPixels = Width*Height;
381 float Sum, Sigma, Denom;
387 Temp = (
float *)
Malloc(
sizeof(
float)*NumPixels);
396 for(i = 0; i < NumPixels; i++)
401 Sigma = (float)(Width/(2*
M_PI*d*PsfSigma));
402 Denom = 2*Sigma*Sigma;
403 PhiLastRow = Phi + Width*(Height - 1);
406 for(x = 0; x < Width; x++)
408 for(k = -NumOverlaps, Sum = 0; k < NumOverlaps; k++)
411 Sum += (float)exp(-(t*t)/Denom);
417 Sigma = (float)(Height/(2*
M_PI*d*PsfSigma));
418 Denom = 2*Sigma*Sigma;
421 for(y = 0, i = 0; y < Height; y++, i += Width)
423 for(k = -NumOverlaps, Sum = 0; k < NumOverlaps; k++)
426 Sum += (float)exp(-(t*t)/Denom);
430 for(x = 0; x < Width; x++)
431 Phi[x + i] = Sum*PhiLastRow[x];
436 for(i = 0; i < NumPixels; i++)
437 Temp[i] = Phi[i]*Phi[i];
439 SumAliases(Temp, d, Width, Height);
442 for(i = 0; i < NumPixels; i++)
443 Phi[i] /= (
float)sqrt(Temp[i] * NumPixels);
450 static void ImageSumAliases(
float *u,
int d,
int Width,
int Height,
453 const int BlockWidth = Width / d;
454 const int BlockWidth2 = 2*BlockWidth;
455 const int BlockHeight = Height / d;
456 const int StripSize = 2*Width*BlockHeight;
457 float *uChannel, *Src, *Dest;
458 int k, i, y, Channel;
461 for(Channel = 0; Channel < NumChannels; Channel++)
463 uChannel = u + 2*Width*Height*Channel;
466 for(y = 0; y < Height; y++)
468 Dest = uChannel + 2*Width*y;
470 for(k = 1; k < d; k++)
472 Src = Dest + 2*BlockWidth*k;
474 for(i = 0; i < BlockWidth2; i++)
480 for(k = 1; k < d; k++)
483 Src = uChannel + 2*Width*(BlockHeight*k);
485 for(y = 0; y < BlockHeight; y++)
487 for(i = 0; i < BlockWidth2; i++)
496 for(y = 0, Src = Dest = uChannel; y < Height; y++, Src += 2*Width)
498 for(k = 1, Dest += BlockWidth2; k < d; k++, Dest += BlockWidth2)
500 memcpy(Dest, Src,
sizeof(
float)*BlockWidth2);
505 for(k = 1, Dest = uChannel + StripSize; k < d; k++, Dest += StripSize)
506 memcpy(Dest, uChannel,
sizeof(
float)*StripSize);
512 static void MultiplyByPhiInterleaved(
float *Dest,
const float *Phi,
513 int NumPixels,
int NumChannels)
518 for(Channel = 0; Channel < NumChannels; Channel++, Dest += 2*NumPixels)
520 for(i = 0; i < NumPixels; i++)
523 Dest[2*i + 1] *= Phi[i];
530 static void MirrorSpectra(
float *Dest,
int Width,
int Height,
int NumChannels)
532 const int H = Width/2 + 1;
533 int i, sx, sy, x, y, Channel;
537 for(Channel = 0, i = 0; Channel < NumChannels; Channel++)
539 for(y = 0; y < Height; y++)
541 sy = (y > 0) ? Height - y : 0;
542 sy = 2*Width*(sy + Height*Channel);
544 for(x = H, i += 2*H; x < Width; x++, i += 2)
548 Dest[i] = Dest[sx + sy];
549 Dest[i + 1] = -Dest[sx + sy + 1];
557 static void Project(
float *u,
float *Temp,
float *ConvTemp,
558 fftwf_plan ForwardPlan, fftwf_plan InversePlan,
const float *u0,
559 const float *Phi,
int ScaleFactor,
560 int OutputWidth,
int OutputHeight,
int Padding)
562 const int OutputNumPixels = OutputWidth*OutputHeight;
563 const int OutputNumEl = 3*OutputNumPixels;
564 int TransWidth, TransHeight, TransNumPixels;
565 int i, x, y, sx, sy, OffsetX, OffsetY, Channel;
568 TransWidth = OutputWidth + 2*Padding*ScaleFactor;
569 TransHeight = OutputHeight + 2*Padding*ScaleFactor;
571 if(TransWidth > 2*OutputWidth)
572 TransWidth = 2*OutputWidth;
573 if(TransHeight > 2*OutputHeight)
574 TransHeight = 2*OutputHeight;
576 TransNumPixels = TransWidth*TransHeight;
577 OffsetX = (TransWidth - OutputWidth)/2;
578 OffsetY = (TransHeight - OutputHeight)/2;
579 OffsetX -= OffsetX % ScaleFactor;
580 OffsetY -= OffsetY % ScaleFactor;
582 for(Channel = 0, i = 0; Channel < OutputNumEl; Channel += OutputNumPixels)
584 for(y = 0; y < TransHeight; y++)
592 else if(sy >= OutputHeight)
593 sy = 2*OutputHeight - 1 - sy;
598 for(x = 0; x < TransWidth; x++, i++)
606 else if(sx >= OutputWidth)
607 sx = 2*OutputWidth - 1 - sx;
612 Temp[i] = u[sx + OutputWidth*sy + Channel]
613 - u0[sx + OutputWidth*sy + Channel];
618 fftwf_execute(ForwardPlan);
619 MirrorSpectra(ConvTemp, TransWidth, TransHeight, 3);
621 MultiplyByPhiInterleaved(ConvTemp, Phi, TransNumPixels, 3);
622 ImageSumAliases(ConvTemp, ScaleFactor, TransWidth, TransHeight, 3);
623 MultiplyByPhiInterleaved(ConvTemp, Phi, TransNumPixels, 3);
625 fftwf_execute(InversePlan);
628 Temp += OffsetX + TransWidth*OffsetY;
630 for(Channel = 0, i = 0; Channel < 3; Channel++)
632 for(y = 0; y < OutputHeight; y++)
634 for(x = 0; x < OutputWidth; x++, i++)
636 u[i] -= Temp[x + TransWidth*y];
640 Temp += TransNumPixels;
645 static float ComputeDiff(
const float *u,
const float *uLast,
648 float Temp, Diff = 0;
651 for(i = 0; i < OutputNumEl; i++)
653 Temp = u[i] - uLast[i];
657 return sqrt(Diff/OutputNumEl);
695 const float *Input,
int InputWidth,
int InputHeight,
double PsfSigma,
696 double K,
float Tol,
int MaxMethodIter,
int DiffIter)
698 const int Padding = 5;
699 const int OutputNumPixels = OutputWidth*OutputHeight;
700 const int OutputNumEl = 3*OutputNumPixels;
701 float *u0 = NULL, *Txx = NULL, *Txy = NULL, *Tyy = NULL, *Temp = NULL,
702 *ConvTemp = NULL, *vx = NULL, *vy = NULL, *Phi = NULL, *uLast = NULL;
703 fftwf_plan ForwardPlan = 0, InversePlan = 0;
705 fftw_iodim HowManyDims[1];
706 filter PreSmooth = {NULL, 0, 0}, PostSmooth = {NULL, 0, 0};
707 float PreSmoothSigma, PostSmoothSigma, Diff;
708 unsigned long StartTime, StopTime;
709 int TransWidth, TransHeight, TransNumPixels;
710 int Iter, ScaleFactor, Success = 0;
713 ScaleFactor = OutputWidth / InputWidth;
714 PreSmoothSigma = 0.3f * ScaleFactor;
715 PostSmoothSigma = 0.4f * ScaleFactor;
717 TransWidth = OutputWidth + 2*Padding*ScaleFactor;
718 TransHeight = OutputHeight + 2*Padding*ScaleFactor;
720 if(TransWidth > 2*OutputWidth)
721 TransWidth = 2*OutputWidth;
722 if(TransHeight > 2*OutputHeight)
723 TransHeight = 2*OutputHeight;
725 TransNumPixels = TransWidth*TransHeight;
727 printf(
"Initial interpolation\n");
732 else if(MaxMethodIter <= 0)
740 || OutputWidth != ScaleFactor*InputWidth
741 || OutputHeight != ScaleFactor*InputHeight
742 || !(ConvTemp = (
float *)fftwf_malloc(
sizeof(
float)*24*OutputNumPixels))
743 || !(Temp = (
float *)fftwf_malloc(
sizeof(
float)*12*OutputNumPixels))
744 || !(Phi = (
float *)
Malloc(
sizeof(
float)*4*OutputNumPixels))
745 || !(u0 = (
float *)
Malloc(
sizeof(
float)*3*OutputNumPixels))
746 || !(uLast = (
float *)
Malloc(
sizeof(
float)*3*OutputNumPixels))
747 || !(Txx = (
float *)
Malloc(
sizeof(
float)*OutputNumPixels))
748 || !(Txy = (
float *)
Malloc(
sizeof(
float)*OutputNumPixels))
749 || !(Tyy = (
float *)
Malloc(
sizeof(
float)*OutputNumPixels))
750 || !(vx = (
float *)
Malloc(
sizeof(
float)*OutputNumPixels))
751 || !(vy = (
float *)
Malloc(
sizeof(
float)*OutputNumPixels))
753 (
int)ceil(2.5*PreSmoothSigma)))
755 (
int)ceil(2.5*PostSmoothSigma))))
761 HowManyDims[0].n = 3;
762 HowManyDims[0].is = TransNumPixels;
763 HowManyDims[0].os = TransNumPixels;
765 Dims[0].n = TransHeight;
766 Dims[0].is = TransWidth;
767 Dims[0].os = TransWidth;
768 Dims[1].n = TransWidth;
778 if(!(ForwardPlan = fftwf_plan_guru_dft_r2c(2, Dims, 1, HowManyDims,
779 Temp, (fftwf_complex *)ConvTemp, FFTW_ESTIMATE | FFTW_DESTROY_INPUT))
780 || !(InversePlan = fftwf_plan_guru_dft_c2r(2, Dims, 1, HowManyDims,
781 (fftwf_complex *)ConvTemp, Temp, FFTW_ESTIMATE | FFTW_DESTROY_INPUT)))
784 printf(
"Roussos-Maragos interpolation\n");
787 MakePhi(Phi, PsfSigma, ScaleFactor, TransWidth, TransHeight);
788 memcpy(u0, u,
sizeof(
float)*3*OutputNumPixels);
791 for(Iter = 1; Iter <= MaxMethodIter; Iter++)
793 memcpy(uLast, u,
sizeof(
float)*OutputNumEl);
795 ComputeTensor(Txx, Txy, Tyy, Temp, ConvTemp, u,
796 OutputWidth, OutputHeight, PreSmooth, PostSmooth, K);
798 DiffuseWithTensor(u, vx, vy, Temp, Temp + OutputNumPixels, ConvTemp,
799 Txx, Txy, Tyy, OutputWidth, OutputHeight, DiffIter);
801 Project(u, Temp, ConvTemp, ForwardPlan, InversePlan, u0, Phi,
802 ScaleFactor, OutputWidth, OutputHeight, Padding);
804 Diff = ComputeDiff(u, uLast, OutputNumEl);
806 if(Iter >= 2 && Diff <= Tol)
808 printf(
"Converged in %d iterations.\n", Iter);
816 printf(
"Maximum number of iterations exceeded.\n");
818 printf(
"CPU Time: %.3f s\n\n", 0.001*(StopTime - StartTime));
821 fftwf_destroy_plan(InversePlan);
822 fftwf_destroy_plan(ForwardPlan);
838 fftwf_free(ConvTemp);