Roussos-Maragos Tensor-Driven Diffusion Interpolation
 All Data Structures Files Functions Variables Typedefs Enumerations Enumerator Macros
tdinterp.c
Go to the documentation of this file.
1 
13 #include <stdio.h>
14 #include <stdlib.h>
15 #include <string.h>
16 #include <math.h>
17 #include <fftw3.h>
18 
19 #include "basic.h"
20 #include "finterp.h"
21 #include "tdinterp.h"
22 #include "conv.h"
23 
24 
26 static void XDerivative(float *Dest, float *ConvTemp, const float *Src,
27  int Width, int Height)
28 {
29  int i, il, ir, y;
30  int iEnd;
31 
32 
33  for(y = 0, i = 0, iEnd = -1; y < Height; y++)
34  {
35  ConvTemp[i] = Src[i + 1] - Src[i];
36  i++;
37 
38  for(iEnd += Width; i < iEnd; i++)
39  ConvTemp[i] = Src[i + 1] - Src[i - 1];
40 
41  ConvTemp[i] = Src[i] - Src[i - 1];
42  i++;
43  }
44 
45  for(y = 0, i = iEnd = 0; y < Height; y++)
46  {
47  il = (y > 0) ? -Width : 0;
48  ir = (y < Height - 1) ? Width : 0;
49 
50  for(iEnd += Width; i < iEnd; i++)
51  Dest[i] = 0.3125f*ConvTemp[i]
52  + 0.09375f*(ConvTemp[i + ir] + ConvTemp[i + il]);
53  }
54 }
55 
56 
58 static void YDerivative(float *Dest, float *ConvTemp, const float *Src,
59  int Width, int Height)
60 {
61  int i, il, ir, iEnd, y;
62 
63 
64  for(y = 0, i = iEnd = 0; y < Height; y++)
65  {
66  il = (y > 0) ? -Width : 0;
67  ir = (y < Height - 1) ? Width : 0;
68 
69  for(iEnd += Width; i < iEnd; i++)
70  ConvTemp[i] = Src[i + ir] - Src[i + il];
71  }
72 
73  for(y = 0, i = 0, iEnd = -1; y < Height; y++)
74  {
75  Dest[i] = 0.40625f*ConvTemp[i] + 0.09375f*ConvTemp[i + 1];
76  i++;
77 
78  for(iEnd += Width; i < iEnd; i++)
79  Dest[i] = 0.3125f*ConvTemp[i]
80  + 0.09375f*(ConvTemp[i + 1] + ConvTemp[i - 1]);
81 
82  Dest[i] = 0.40625f*ConvTemp[i] + 0.09375f*ConvTemp[i - 1];
83  i++;
84  }
85 }
86 
87 
89 static void ComputeTensor(float *Txx, float *Txy, float *Tyy,
90  float *uSmooth, float *ConvTemp, const float *u, int Width, int Height,
91  filter PreSmooth, filter PostSmooth, double K)
92 {
93  const float dt = 2;
94  const double KSquared = K*K;
95  const int NumPixels = Width*Height;
96  const int NumEl = 3*NumPixels;
97  boundaryext Boundary = GetBoundaryExt("sym");
98  double Tm, SqrtTm, Trace, Lambda1, Lambda2, EigVecX, EigVecY, Temp;
99  float ux, uy;
100  int i, iu, id, il, ir, x, y, Channel;
101 
102 
103  /* Set the tensor to zero and perform pre-smoothing on u. Note that it is
104  not safely portable to use memset for this purpose.
105  http://c-faq.com/malloc/calloc.html */
106 #ifdef _OPENMP
107  #pragma omp parallel sections private(i)
108  { /* The following six operations can run in parallel */
109  #pragma omp section
110  {
111  for(i = 0; i < NumPixels; i++)
112  Txx[i] = 0.0f;
113  }
114  #pragma omp section
115  {
116  for(i = 0; i < NumPixels; i++)
117  Txy[i] = 0.0f;
118  }
119  #pragma omp section
120  {
121  for(i = 0; i < NumPixels; i++)
122  Tyy[i] = 0.0f;
123  }
124  #pragma omp section
125  {
126  SeparableConv2D(uSmooth,
127  ConvTemp, u,
128  PreSmooth, PreSmooth, Boundary, Width, Height, 1);
129  }
130  #pragma omp section
131  {
132  SeparableConv2D(uSmooth + NumPixels,
133  ConvTemp + NumPixels, u + NumPixels,
134  PreSmooth, PreSmooth, Boundary, Width, Height, 1);
135  }
136  #pragma omp section
137  {
138  SeparableConv2D(uSmooth + 2*NumPixels,
139  ConvTemp + 2*NumPixels, u + 2*NumPixels,
140  PreSmooth, PreSmooth, Boundary, Width, Height, 1);
141  }
142  }
143 #else
144  for(i = 0; i < NumPixels; i++)
145  Txx[i] = 0.0f;
146 
147  for(i = 0; i < NumPixels; i++)
148  Txy[i] = 0.0f;
149 
150  for(i = 0; i < NumPixels; i++)
151  Tyy[i] = 0.0f;
152 
153  SeparableConv2D(uSmooth, ConvTemp, u,
154  PreSmooth, PreSmooth, Boundary, Width, Height, 3);
155 #endif
156 
157  /* Compute the structure tensor */
158  for(Channel = 0; Channel < NumEl; Channel += NumPixels)
159  {
160  for(y = 0, i = 0; y < Height; y++)
161  {
162  iu = (y > 0) ? -Width : 0;
163  id = (y < Height - 1) ? Width : 0;
164 
165  for(x = 0; x < Width; x++, i++)
166  {
167  il = (x > 0) ? -1 : 0;
168  ir = (x < Width - 1) ? 1 : 0;
169 
170  ux = (uSmooth[i + ir + Channel]
171  - uSmooth[i + il + Channel]) / 2;
172  uy = (uSmooth[i + id + Channel]
173  - uSmooth[i + iu + Channel]) / 2;
174  Txx[i] += ux * ux;
175  Txy[i] += ux * uy;
176  Tyy[i] += uy * uy;
177  }
178  }
179  }
180 
181  /* Perform the post-smoothing convolution with PostSmooth */
182 #ifdef _OPENMP
183  #pragma omp parallel sections
184  {
185  #pragma omp section
186  {
187  SeparableConv2D(Txx, ConvTemp, Txx,
188  PostSmooth, PostSmooth, Boundary, Width, Height, 1);
189  }
190  #pragma omp section
191  {
192  SeparableConv2D(Txy, ConvTemp + NumPixels, Txy,
193  PostSmooth, PostSmooth, Boundary, Width, Height, 1);
194  }
195  #pragma omp section
196  {
197  SeparableConv2D(Tyy, ConvTemp + 2*NumPixels, Tyy,
198  PostSmooth, PostSmooth, Boundary, Width, Height, 1);
199  }
200  }
201 #else
202  SeparableConv2D(Txx, ConvTemp, Txx,
203  PostSmooth, PostSmooth, Boundary, Width, Height, 1);
204  SeparableConv2D(Txy, ConvTemp, Txy,
205  PostSmooth, PostSmooth, Boundary, Width, Height, 1);
206  SeparableConv2D(Tyy, ConvTemp, Tyy,
207  PostSmooth, PostSmooth, Boundary, Width, Height, 1);
208 #endif
209 
210  /* Refactor the structure tensor */
211  for(i = 0; i < NumPixels; i++)
212  {
213  /* Compute the eigenspectra */
214  Trace = 0.5*(Txx[i] + Tyy[i]);
215  Temp = sqrt(Trace*Trace - Txx[i]*Tyy[i] + Txy[i]*Txy[i]);
216  Lambda1 = Trace - Temp;
217  Lambda2 = Trace + Temp;
218  EigVecX = Txy[i];
219  EigVecY = Lambda1 - Txx[i];
220  Temp = sqrt(EigVecX*EigVecX + EigVecY*EigVecY);
221 
222  if(Temp >= 1e-9)
223  {
224  EigVecX /= Temp;
225  EigVecY /= Temp;
226  Tm = KSquared/(KSquared + (Lambda1 + Lambda2));
227  SqrtTm = sqrt(Tm);
228 
229  /* Construct new tensor from the spectra */
230  Txx[i] = (float)(dt*(SqrtTm*EigVecX*EigVecX + Tm*EigVecY*EigVecY));
231  Txy[i] = (float)(dt*((SqrtTm - Tm)*EigVecX*EigVecY));
232  Tyy[i] = (float)(dt*(SqrtTm*EigVecY*EigVecY + Tm*EigVecX*EigVecX));
233  }
234  else
235  {
236  Txx[i] = dt;
237  Txy[i] = 0.0f;
238  Tyy[i] = dt;
239  }
240  }
241 }
242 
243 
245 static void DiffuseWithTensor(float *u, float *vx, float *vy,
246  float *SumX, float *SumY,
247  float *ConvTemp, const float *Txx, const float *Txy, const float *Tyy,
248  int Width, int Height, int DiffIter)
249 {
250  const int NumPixels = Width*Height;
251  int i, Channel, Step;
252 
253 
254  for(Channel = 0; Channel < 3; Channel++, u += NumPixels)
255  {
256  for(Step = 0; Step < DiffIter; Step++)
257  {
258 #ifdef _OPENMP
259  #pragma omp parallel
260  {
261  #pragma omp sections
262  { /* XDerivative and YDerivative can run in parallel */
263  #pragma omp section
264  {
265  XDerivative(vx, ConvTemp, u, Width, Height);
266  }
267  #pragma omp section
268  {
269  YDerivative(vy, ConvTemp + NumPixels, u, Width, Height);
270  }
271  }
272 
273  #pragma omp for schedule(static)
274  for(i = 0; i < NumPixels; i++)
275  {
276  SumX[i] = Txx[i]*vx[i] + Txy[i]*vy[i];
277  SumY[i] = Txy[i]*vx[i] + Tyy[i]*vy[i];
278  }
279 
280  #pragma omp sections
281  { /* XDerivative and YDerivative can run in parallel */
282  #pragma omp section
283  {
284  XDerivative(SumX, ConvTemp,
285  SumX, Width, Height);
286  }
287  #pragma omp section
288  {
289  YDerivative(SumY, ConvTemp + NumPixels,
290  SumY, Width, Height);
291  }
292  }
293 
294  #pragma omp for schedule(static)
295  for(i = 0; i < NumPixels; i++)
296  u[i] += SumX[i] + SumY[i];
297  }
298 #else
299  XDerivative(vx, ConvTemp, u, Width, Height);
300  YDerivative(vy, ConvTemp, u, Width, Height);
301 
302  for(i = 0; i < NumPixels; i++)
303  {
304  SumX[i] = Txx[i]*vx[i] + Txy[i]*vy[i];
305  SumY[i] = Txy[i]*vx[i] + Tyy[i]*vy[i];
306  }
307 
308  XDerivative(SumX, ConvTemp, SumX, Width, Height);
309  YDerivative(SumY, ConvTemp, SumY, Width, Height);
310 
311  for(i = 0; i < NumPixels; i++)
312  u[i] += SumX[i] + SumY[i];
313 #endif
314  }
315  }
316 }
317 
318 
320 static void SumAliases(float *u, int d, int Width, int Height)
321 {
322  const int BlockWidth = Width / d;
323  const int BlockHeight = Height / d;
324  const int StripSize = Width*BlockHeight;
325  float *Src, *Dest;
326  int k, x, y;
327 
328 
329  /* Sum along x for every y */
330  for(y = 0; y < Height; y++)
331  {
332  Dest = u + y*Width;
333 
334  for(k = 1; k < d; k++)
335  {
336  Src = Dest + k*BlockWidth;
337 
338  for(x = 0; x < BlockWidth; x++)
339  Dest[x] += Src[x];
340  }
341  }
342 
343  /* Sum along y for 0 <= x < BlockWidth */
344  for(k = 1; k < d; k++)
345  {
346  Dest = u;
347  Src = u + Width*(k*BlockHeight);
348 
349  for(y = 0; y < BlockHeight; y++)
350  {
351  for(x = 0; x < BlockWidth; x++)
352  Dest[x] += Src[x];
353 
354  Dest += Width;
355  Src += Width;
356  }
357  }
358 
359  /* Copy block [0, BlockWidth) x [0, BlockHeight) in the x direction*/
360  for(y = 0, Src = Dest = u; y < Height; y++, Src += Width)
361  {
362  for(k = 1, Dest += BlockWidth; k < d; k++, Dest += BlockWidth)
363  {
364  memcpy(Dest, Src, sizeof(float)*BlockWidth);
365  }
366  }
367 
368  /* Copy strip [0, OutputWidth) x [0, InputHeight) in the y direction*/
369  for(k = 1, Dest = u + StripSize; k < d; k++, Dest += StripSize)
370  memcpy(Dest, u, sizeof(float)*StripSize);
371 }
372 
373 
375 static void MakePhi(float *Phi, double PsfSigma, int d, int Width, int Height)
376 {
377  /* Number of terms to use in truncated sum, larger is more accurate */
378  const int NumOverlaps = 2;
379  const int NumPixels = Width*Height;
380  float *PhiLastRow;
381  float Sum, Sigma, Denom;
382  int i, x, y, t, k;
383 
384  float *Temp;
385 
386 
387  Temp = (float *)Malloc(sizeof(float)*NumPixels);
388 
389  /* Construct the Fourier transform of a Gaussian with spatial standard
390  * deviation d*PsfSigma. Using the Gaussian and the transform's
391  * separability, the result is formed as the tensor product of the 1D
392  * transforms. This significantly reduces the number of exp calls.
393  */
394  if(PsfSigma == 0.0)
395  {
396  for(i = 0; i < NumPixels; i++)
397  Phi[i] = 1.0f;
398  }
399  else
400  {
401  Sigma = (float)(Width/(2*M_PI*d*PsfSigma));
402  Denom = 2*Sigma*Sigma;
403  PhiLastRow = Phi + Width*(Height - 1);
404 
405  /* Construct the transform along x */
406  for(x = 0; x < Width; x++)
407  {
408  for(k = -NumOverlaps, Sum = 0; k < NumOverlaps; k++)
409  {
410  t = x + k*Width;
411  Sum += (float)exp(-(t*t)/Denom);
412  }
413 
414  PhiLastRow[x] = Sum;
415  }
416 
417  Sigma = (float)(Height/(2*M_PI*d*PsfSigma));
418  Denom = 2*Sigma*Sigma;
419 
420  /* Construct the transform along y */
421  for(y = 0, i = 0; y < Height; y++, i += Width)
422  {
423  for(k = -NumOverlaps, Sum = 0; k < NumOverlaps; k++)
424  {
425  t = y + k*Height;
426  Sum += (float)exp(-(t*t)/Denom);
427  }
428 
429  /* Tensor product */
430  for(x = 0; x < Width; x++)
431  Phi[x + i] = Sum*PhiLastRow[x];
432  }
433  }
434 
435  /* Square all the elements so that Temp = abs(fft2(psf)).^2 */
436  for(i = 0; i < NumPixels; i++)
437  Temp[i] = Phi[i]*Phi[i];
438 
439  SumAliases(Temp, d, Width, Height);
440 
441  /* Obtain the projection kernel Phi in the Fourier domain */
442  for(i = 0; i < NumPixels; i++)
443  Phi[i] /= (float)sqrt(Temp[i] * NumPixels);
444 
445  Free(Temp);
446 }
447 
448 
450 static void ImageSumAliases(float *u, int d, int Width, int Height,
451  int NumChannels)
452 {
453  const int BlockWidth = Width / d;
454  const int BlockWidth2 = 2*BlockWidth;
455  const int BlockHeight = Height / d;
456  const int StripSize = 2*Width*BlockHeight;
457  float *uChannel, *Src, *Dest;
458  int k, i, y, Channel;
459 
460 
461  for(Channel = 0; Channel < NumChannels; Channel++)
462  {
463  uChannel = u + 2*Width*Height*Channel;
464 
465  /* Sum along x for every y */
466  for(y = 0; y < Height; y++)
467  {
468  Dest = uChannel + 2*Width*y;
469 
470  for(k = 1; k < d; k++)
471  {
472  Src = Dest + 2*BlockWidth*k;
473 
474  for(i = 0; i < BlockWidth2; i++)
475  Dest[i] += Src[i];
476  }
477  }
478 
479  /* Sum along y for 0 <= x < BlockWidth */
480  for(k = 1; k < d; k++)
481  {
482  Dest = uChannel;
483  Src = uChannel + 2*Width*(BlockHeight*k);
484 
485  for(y = 0; y < BlockHeight; y++)
486  {
487  for(i = 0; i < BlockWidth2; i++)
488  Dest[i] += Src[i];
489 
490  Dest += 2*Width;
491  Src += 2*Width;
492  }
493  }
494 
495  /* Copy block [0, BlockWidth) x [0, BlockHeight) in the x direction */
496  for(y = 0, Src = Dest = uChannel; y < Height; y++, Src += 2*Width)
497  {
498  for(k = 1, Dest += BlockWidth2; k < d; k++, Dest += BlockWidth2)
499  {
500  memcpy(Dest, Src, sizeof(float)*BlockWidth2);
501  }
502  }
503 
504  /* Copy strip [0, OutputWidth) x [0, InputHeight) in the y direction */
505  for(k = 1, Dest = uChannel + StripSize; k < d; k++, Dest += StripSize)
506  memcpy(Dest, uChannel, sizeof(float)*StripSize);
507  }
508 }
509 
510 
512 static void MultiplyByPhiInterleaved(float *Dest, const float *Phi,
513  int NumPixels, int NumChannels)
514 {
515  int i, Channel;
516 
517 
518  for(Channel = 0; Channel < NumChannels; Channel++, Dest += 2*NumPixels)
519  {
520  for(i = 0; i < NumPixels; i++)
521  {
522  Dest[2*i] *= Phi[i];
523  Dest[2*i + 1] *= Phi[i];
524  }
525  }
526 }
527 
528 
530 static void MirrorSpectra(float *Dest, int Width, int Height, int NumChannels)
531 {
532  const int H = Width/2 + 1;
533  int i, sx, sy, x, y, Channel;
534 
535 
536  /* Fill in the redundant spectra */
537  for(Channel = 0, i = 0; Channel < NumChannels; Channel++)
538  {
539  for(y = 0; y < Height; y++)
540  {
541  sy = (y > 0) ? Height - y : 0;
542  sy = 2*Width*(sy + Height*Channel);
543 
544  for(x = H, i += 2*H; x < Width; x++, i += 2)
545  {
546  sx = 2*(Width - x);
547  /* Set Dest(x,y,Channel) = conj(Dest(sx,sy,Channel)) */
548  Dest[i] = Dest[sx + sy];
549  Dest[i + 1] = -Dest[sx + sy + 1];
550  }
551  }
552  }
553 }
554 
555 
557 static void Project(float *u, float *Temp, float *ConvTemp,
558  fftwf_plan ForwardPlan, fftwf_plan InversePlan, const float *u0,
559  const float *Phi, int ScaleFactor,
560  int OutputWidth, int OutputHeight, int Padding)
561 {
562  const int OutputNumPixels = OutputWidth*OutputHeight;
563  const int OutputNumEl = 3*OutputNumPixels;
564  int TransWidth, TransHeight, TransNumPixels;
565  int i, x, y, sx, sy, OffsetX, OffsetY, Channel;
566 
567 
568  TransWidth = OutputWidth + 2*Padding*ScaleFactor;
569  TransHeight = OutputHeight + 2*Padding*ScaleFactor;
570 
571  if(TransWidth > 2*OutputWidth)
572  TransWidth = 2*OutputWidth;
573  if(TransHeight > 2*OutputHeight)
574  TransHeight = 2*OutputHeight;
575 
576  TransNumPixels = TransWidth*TransHeight;
577  OffsetX = (TransWidth - OutputWidth)/2;
578  OffsetY = (TransHeight - OutputHeight)/2;
579  OffsetX -= OffsetX % ScaleFactor;
580  OffsetY -= OffsetY % ScaleFactor;
581 
582  for(Channel = 0, i = 0; Channel < OutputNumEl; Channel += OutputNumPixels)
583  {
584  for(y = 0; y < TransHeight; y++)
585  {
586  sy = y - OffsetY;
587 
588  while(1)
589  {
590  if(sy < 0)
591  sy = -1 - sy;
592  else if(sy >= OutputHeight)
593  sy = 2*OutputHeight - 1 - sy;
594  else
595  break;
596  }
597 
598  for(x = 0; x < TransWidth; x++, i++)
599  {
600  sx = x - OffsetX;
601 
602  while(1)
603  {
604  if(sx < 0)
605  sx = -1 - sx;
606  else if(sx >= OutputWidth)
607  sx = 2*OutputWidth - 1 - sx;
608  else
609  break;
610  }
611 
612  Temp[i] = u[sx + OutputWidth*sy + Channel]
613  - u0[sx + OutputWidth*sy + Channel];
614  }
615  }
616  }
617 
618  fftwf_execute(ForwardPlan);
619  MirrorSpectra(ConvTemp, TransWidth, TransHeight, 3);
620 
621  MultiplyByPhiInterleaved(ConvTemp, Phi, TransNumPixels, 3);
622  ImageSumAliases(ConvTemp, ScaleFactor, TransWidth, TransHeight, 3);
623  MultiplyByPhiInterleaved(ConvTemp, Phi, TransNumPixels, 3);
624 
625  fftwf_execute(InversePlan);
626 
627  /* Subtract a halved version of Temp from u */
628  Temp += OffsetX + TransWidth*OffsetY;
629 
630  for(Channel = 0, i = 0; Channel < 3; Channel++)
631  {
632  for(y = 0; y < OutputHeight; y++)
633  {
634  for(x = 0; x < OutputWidth; x++, i++)
635  {
636  u[i] -= Temp[x + TransWidth*y];
637  }
638  }
639 
640  Temp += TransNumPixels;
641  }
642 }
643 
644 
645 static float ComputeDiff(const float *u, const float *uLast,
646  int OutputNumEl)
647 {
648  float Temp, Diff = 0;
649  int i;
650 
651  for(i = 0; i < OutputNumEl; i++)
652  {
653  Temp = u[i] - uLast[i];
654  Diff += Temp*Temp;
655  }
656 
657  return sqrt(Diff/OutputNumEl);
658 }
659 
660 
694 int RoussosInterp(float *u, int OutputWidth, int OutputHeight,
695  const float *Input, int InputWidth, int InputHeight, double PsfSigma,
696  double K, float Tol, int MaxMethodIter, int DiffIter)
697 {
698  const int Padding = 5;
699  const int OutputNumPixels = OutputWidth*OutputHeight;
700  const int OutputNumEl = 3*OutputNumPixels;
701  float *u0 = NULL, *Txx = NULL, *Txy = NULL, *Tyy = NULL, *Temp = NULL,
702  *ConvTemp = NULL, *vx = NULL, *vy = NULL, *Phi = NULL, *uLast = NULL;
703  fftwf_plan ForwardPlan = 0, InversePlan = 0;
704  fftw_iodim Dims[2];
705  fftw_iodim HowManyDims[1];
706  filter PreSmooth = {NULL, 0, 0}, PostSmooth = {NULL, 0, 0};
707  float PreSmoothSigma, PostSmoothSigma, Diff;
708  unsigned long StartTime, StopTime;
709  int TransWidth, TransHeight, TransNumPixels;
710  int Iter, ScaleFactor, Success = 0;
711 
712 
713  ScaleFactor = OutputWidth / InputWidth;
714  PreSmoothSigma = 0.3f * ScaleFactor;
715  PostSmoothSigma = 0.4f * ScaleFactor;
716 
717  TransWidth = OutputWidth + 2*Padding*ScaleFactor;
718  TransHeight = OutputHeight + 2*Padding*ScaleFactor;
719 
720  if(TransWidth > 2*OutputWidth)
721  TransWidth = 2*OutputWidth;
722  if(TransHeight > 2*OutputHeight)
723  TransHeight = 2*OutputHeight;
724 
725  TransNumPixels = TransWidth*TransHeight;
726 
727  printf("Initial interpolation\n");
728 
729  if(!FourierScale2d(u, OutputWidth, 0, OutputHeight, 0,
730  Input, InputWidth, InputHeight, 3, PsfSigma, BOUNDARY_HSYMMETRIC))
731  goto Catch;
732  else if(MaxMethodIter <= 0)
733  {
734  Success = 1;
735  goto Catch;
736  }
737 
738  /* Allocate a fantastic amount of memory */
739  if(ScaleFactor <= 1
740  || OutputWidth != ScaleFactor*InputWidth
741  || OutputHeight != ScaleFactor*InputHeight
742  || !(ConvTemp = (float *)fftwf_malloc(sizeof(float)*24*OutputNumPixels))
743  || !(Temp = (float *)fftwf_malloc(sizeof(float)*12*OutputNumPixels))
744  || !(Phi = (float *)Malloc(sizeof(float)*4*OutputNumPixels))
745  || !(u0 = (float *)Malloc(sizeof(float)*3*OutputNumPixels))
746  || !(uLast = (float *)Malloc(sizeof(float)*3*OutputNumPixels))
747  || !(Txx = (float *)Malloc(sizeof(float)*OutputNumPixels))
748  || !(Txy = (float *)Malloc(sizeof(float)*OutputNumPixels))
749  || !(Tyy = (float *)Malloc(sizeof(float)*OutputNumPixels))
750  || !(vx = (float *)Malloc(sizeof(float)*OutputNumPixels))
751  || !(vy = (float *)Malloc(sizeof(float)*OutputNumPixels))
752  || IsNullFilter(PreSmooth = GaussianFilter(PreSmoothSigma,
753  (int)ceil(2.5*PreSmoothSigma)))
754  || IsNullFilter(PostSmooth = GaussianFilter(PostSmoothSigma,
755  (int)ceil(2.5*PostSmoothSigma))))
756  goto Catch;
757 
758  /* All arrays in the main computation are in planar order so that data
759  access in convolutions and DFTs are more localized. */
760 
761  HowManyDims[0].n = 3;
762  HowManyDims[0].is = TransNumPixels;
763  HowManyDims[0].os = TransNumPixels;
764 
765  Dims[0].n = TransHeight;
766  Dims[0].is = TransWidth;
767  Dims[0].os = TransWidth;
768  Dims[1].n = TransWidth;
769  Dims[1].is = 1;
770  Dims[1].os = 1;
771 
772  /* Create plans for the 2D DFT of Src (vectorized over channels).
773  * After applying the forward transform,
774  * Dest[2*(x + Width*(y + Height*k))] = real component of (x,y,k)th
775  * Dest[2*(x + Width*(y + Height*k)) + 1] = imag component of (x,y,k)th
776  * where for 0 <= x < Width/2 + 1, 0 <= y < Height.
777  */
778  if(!(ForwardPlan = fftwf_plan_guru_dft_r2c(2, Dims, 1, HowManyDims,
779  Temp, (fftwf_complex *)ConvTemp, FFTW_ESTIMATE | FFTW_DESTROY_INPUT))
780  || !(InversePlan = fftwf_plan_guru_dft_c2r(2, Dims, 1, HowManyDims,
781  (fftwf_complex *)ConvTemp, Temp, FFTW_ESTIMATE | FFTW_DESTROY_INPUT)))
782  goto Catch;
783 
784  printf("Roussos-Maragos interpolation\n");
785  StartTime = Clock();
786 
787  MakePhi(Phi, PsfSigma, ScaleFactor, TransWidth, TransHeight);
788  memcpy(u0, u, sizeof(float)*3*OutputNumPixels);
789 
790  /* Projected tensor-driven diffusion main loop */
791  for(Iter = 1; Iter <= MaxMethodIter; Iter++)
792  {
793  memcpy(uLast, u, sizeof(float)*OutputNumEl);
794 
795  ComputeTensor(Txx, Txy, Tyy, Temp, ConvTemp, u,
796  OutputWidth, OutputHeight, PreSmooth, PostSmooth, K);
797 
798  DiffuseWithTensor(u, vx, vy, Temp, Temp + OutputNumPixels, ConvTemp,
799  Txx, Txy, Tyy, OutputWidth, OutputHeight, DiffIter);
800 
801  Project(u, Temp, ConvTemp, ForwardPlan, InversePlan, u0, Phi,
802  ScaleFactor, OutputWidth, OutputHeight, Padding);
803 
804  Diff = ComputeDiff(u, uLast, OutputNumEl);
805 
806  if(Iter >= 2 && Diff <= Tol)
807  {
808  printf("Converged in %d iterations.\n", Iter);
809  break;
810  }
811  }
812 
813  StopTime = Clock();
814 
815  if(Diff > Tol)
816  printf("Maximum number of iterations exceeded.\n");
817 
818  printf("CPU Time: %.3f s\n\n", 0.001*(StopTime - StartTime));
819  Success = 1;
820 Catch:
821  fftwf_destroy_plan(InversePlan);
822  fftwf_destroy_plan(ForwardPlan);
823  fftwf_cleanup();
824  Free(PostSmooth.Coeff);
825  Free(PreSmooth.Coeff);
826  Free(vy);
827  Free(vx);
828  Free(Tyy);
829  Free(Txy);
830  Free(Txx);
831  Free(uLast);
832  Free(u0);
833  Free(Phi);
834 
835  if(Temp)
836  fftwf_free(Temp);
837  if(ConvTemp)
838  fftwf_free(ConvTemp);
839  return Success;
840 }