78 #define NEIGHDIAMETER (2*NEIGHRADIUS+1)
79 #define NUMNEIGH (NEIGHDIAMETER*NEIGHDIAMETER)
89 #define CORRECTION_IGNOREBITS 3
92 #define INPUT_FRACBITS 8
94 #define PSI_FRACBITS 12
96 #define OUTPUT_FRACBITS (INPUT_FRACBITS + PSI_FRACBITS)
99 #define FIXED_ONE(N) (1 << (N))
101 #define FIXED_HALF(N) (1 << ((N) - 1))
114 #define PIXEL_STRIDE 3
120 #define CLAMP(X,A,B) (((X) < (A)) ? (A) : (((X) > (B)) ? (B) : (X)))
123 #define ROUNDCLAMP(X,A,B) (((X) < (A)) ? (A) : (((X) > (B)) ? (B) : ROUND(X)))
127 #define M_2PI 6.283185307179586476925286766559
132 static const double StencilOrientation[
NUMSTENCILS] = {-1.178097245096172,
133 -0.785398163397448, -0.392699081698724, 0.0, 0.392699081698724,
134 0.785398163397448, 1.178097245096172, 1.570796326794897};
138 static double Psf(
double x,
double y,
cwparams Param)
141 return exp(-(x*x + y*y)/(2.0*SigmaSqr))/(
M_2PI*SigmaSqr);
146 static double Phi(
double x,
double y,
147 double theta,
double PhiSigmaTangent,
double PhiSigmaNormal)
152 t = (cos(theta)*x + sin(theta)*y) / PhiSigmaTangent;
153 n = (-sin(theta)*x + cos(theta)*y) / PhiSigmaNormal;
155 return exp(-0.5*(t*t + n*n));
160 static float CubicBSpline(
float x)
165 return (x/2 - 1)*x*x + 0.66666666666666667f;
177 static double Window(
double x,
double y)
185 if(-2.0 < x && x < 2.0 && -2.0 < y && y < 2.0)
188 Temp = fabs(1.0 - x);
189 x = 1.0 - x + (x*x*x - 2.0*Temp*Temp*Temp)/6.0;
192 Temp = fabs(1.0 - y);
193 y = 1.0 - y + (y*y*y - 2.0*Temp*Temp*Temp)/6.0;
202 static void QuadraturePoint(
double *Weight,
double *Abscissa,
int Index,
int NumPanels)
207 *Weight = (Index == 0 || Index == NumPanels)? 0.25 : 0.5;
213 *Abscissa = Index - 1.70820393249936919e-1;
218 *Abscissa = Index + 1.70820393249936919e-1;
225 static double PsfPhiConvolution(
int x,
int y,
double Theta,
cwparams Param)
228 const double R = 4.0*Param.
PsfSigma;
230 const int NumPanels = 3*16;
231 const double PanelSize = 2.0*R/NumPanels;
232 double Integral = 0.0, Slice = 0.0;
242 for(IndexY = 0; IndexY <= NumPanels; IndexY++)
244 QuadraturePoint(&wv, &v, IndexY, NumPanels);
247 for(Slice = 0.0, IndexX = 0; IndexX <= NumPanels; IndexX++)
249 QuadraturePoint(&wu, &u, IndexX, NumPanels);
251 Slice += wu*( Psf(u, v, Param) *
256 Integral += wv*Slice;
259 Integral *= PanelSize*PanelSize;
265 static int ComputeMatrices(
double *InverseA,
cwparams Param)
268 int m, mx, my, n, nx, ny, S;
282 A[m + NUMNEIGH*n] = PsfPhiConvolution(mx - nx, my - ny,
283 StencilOrientation[S], Param);
287 if(!
InvertMatrix(InverseA + S*(NUMNEIGH*NUMNEIGH), A, NUMNEIGH))
322 const int ScaleFactor = (int)ceil(Param.
ScaleFactor);
323 const int SupportRadius = (
NEIGHRADIUS+1)*ScaleFactor - 1;
324 const int SupportWidth = 2*SupportRadius + 1;
325 const int SupportSize = SupportWidth*SupportWidth;
326 double *InverseA = NULL;
327 double x, y, Wxy, Psi0, Psim, XStart, YStart, WSum;
328 int S, sx, sy, i, m0, m, mx, my, n, nx, ny, Success = 0;
331 if(!(Psi = (int32_t *)
Malloc(
sizeof(int32_t)*SupportSize*NUMNEIGH*NUMSTENCILS))
332 || !(InverseA = (
double *)
Malloc(
sizeof(
double)*NUMNEIGH*NUMNEIGH*NUMSTENCILS)))
336 if(!ComputeMatrices(InverseA, Param))
351 for(i = 0, sy = -SupportRadius; sy <= SupportRadius; sy++)
352 for(sx = -SupportRadius; sx <= SupportRadius; sx++, i++)
358 for(ny = -(
int)floor((sy + SupportRadius)/ScaleFactor), WSum = 0;
359 ny <= (2*
NEIGHRADIUS + 1) && sy + ny*ScaleFactor <= SupportRadius; ny++)
360 for(nx = -(
int)floor((sx + SupportRadius)/ScaleFactor);
361 nx <= (2*
NEIGHRADIUS + 1) && sx + nx*ScaleFactor <= SupportRadius; nx++)
363 x = XStart + nx + ((double)sx)/((double)ScaleFactor);
364 y = YStart + ny + ((double)sy)/((double)ScaleFactor);
369 x = XStart + ((double)sx)/((double)ScaleFactor);
370 y = YStart + ((double)sy)/((double)ScaleFactor);
371 Psi0 = Wxy = Window(x, y);
382 Psim += InverseA[m + NUMNEIGH*(n + NUMNEIGH*S)]
383 * Phi(x - nx, y - ny,
390 Psi[i + SupportSize*(m + NUMNEIGH*S)] =
395 Psi[i + SupportSize*(m0 + NUMNEIGH*S)] =
428 static void CWFirstPass(int32_t *Interpolation,
int ScaleFactor,
const int32_t *Input,
429 int InputWidth,
int InputHeight,
const int *Stencil,
const int32_t *Psi)
431 const int32_t *PsiPtr, *SrcWindow;
435 const int SampleRange = (
NEIGHRADIUS+1)*ScaleFactor - 1;
436 const int SampleWidth = 2*SampleRange + 1;
438 const int OutputWidth = ScaleFactor*InputWidth;
439 const int DestWindowJump =
PIXEL_STRIDE*(OutputWidth - SampleWidth);
441 const int DestJump =
PIXEL_STRIDE*(ScaleFactor-1)*OutputWidth + 2*Pad*DestStep;
444 const int StencilJump = 2*Pad;
446 int x, y, NeighX, NeighY, SampleX, SampleY;
450 Interpolation +=
PIXEL_STRIDE*(Pad*ScaleFactor - SampleRange)*(1 + OutputWidth);
452 Stencil += Pad*(1 + InputWidth);
454 for(y = InputHeight - 2*Pad; y; y--,
455 Stencil += StencilJump, Input += SrcJump, Interpolation += DestJump)
456 for(x = InputWidth - 2*Pad; x; x--,
457 Stencil++, Input +=
PIXEL_STRIDE, Interpolation += DestStep)
459 PsiPtr = Psi + *Stencil;
462 for(NeighY =
NEIGHDIAMETER; NeighY; NeighY--, SrcWindow += SrcWindowJump)
468 DestWindow = Interpolation;
469 SampleY = SampleWidth;
471 for(SampleY = SampleWidth; SampleY;
472 SampleY--, DestWindow += DestWindowJump, PsiPtr += SampleWidth)
473 for(SampleX = 0; SampleX < SampleWidth;
476 int32_t Temp = PsiPtr[SampleX];
477 DestWindow[0] += cr * Temp;
478 DestWindow[1] += cg * Temp;
479 DestWindow[2] += cb * Temp;
501 static void CWRefinementPass(int32_t *Interpolation,
int ScaleFactor,
502 const int32_t *Residual,
int InputWidth,
int InputHeight,
503 const int *Stencil,
const int32_t *Sample)
507 const int32_t *SamplePtr, *SrcWindow;
509 const int SampleRange = (
NEIGHRADIUS+1)*ScaleFactor - 1;
510 const int SampleWidth = 2*SampleRange + 1;
511 const int SampleSize = SampleWidth*SampleWidth;
513 const int OutputWidth = ScaleFactor*InputWidth;
514 const int DestWindowJump =
PIXEL_STRIDE*(OutputWidth - SampleWidth);
516 const int DestJump =
PIXEL_STRIDE*(ScaleFactor-1)*OutputWidth + 2*Pad*DestStep;
519 const int StencilJump = 2*Pad;
521 int x, y, NeighX, NeighY, SampleX, SampleY;
525 Interpolation +=
PIXEL_STRIDE*(Pad*ScaleFactor - SampleRange)*(1 + OutputWidth);
527 Stencil += Pad*(1 + InputWidth);
529 for(y = InputHeight - 2*Pad; y; y--,
530 Stencil += StencilJump, Residual += SrcJump, Interpolation += DestJump)
531 for(x = InputWidth - 2*Pad; x; x--,
532 Stencil++, Residual +=
PIXEL_STRIDE, Interpolation += DestStep)
534 SamplePtr = Sample + *Stencil;
535 SrcWindow = Residual;
537 for(NeighY =
NEIGHDIAMETER; NeighY; NeighY--, SrcWindow += SrcWindowJump)
548 DestWindow = Interpolation;
549 SampleY = SampleWidth;
551 for(SampleY = SampleWidth; SampleY;
552 SampleY--, DestWindow += DestWindowJump, SamplePtr += SampleWidth)
553 for(SampleX = 0; SampleX < SampleWidth;
556 int32_t Temp = SamplePtr[SampleX];
557 DestWindow[0] += cr * Temp;
558 DestWindow[1] += cg * Temp;
559 DestWindow[2] += cb * Temp;
563 SamplePtr += SampleSize;
575 static int ConstExtension(
int N,
int i)
586 static float Sqr(
float x)
593 static int32_t CWResidual(int32_t *Residual,
const int32_t *Interpolation,
594 const int32_t *Input,
int CoarseWidth,
int CoarseHeight,
cwparams Param)
597 const int ScaleFactor = (int)ceil(Param.
ScaleFactor);
598 const int InterpWidth = ScaleFactor*CoarseWidth;
599 const int InterpHeight = ScaleFactor*CoarseHeight;
600 const int CoarseStride = 3*CoarseWidth;
601 const float PsfRadius = (float)(4*Param.
PsfSigma*ScaleFactor);
602 const int PsfWidth = (int)ceil(2*PsfRadius);
603 float *Temp = NULL, *PsfBuf = NULL;
604 float ExpDenom, Weight, Sum[3], DenomSum;
605 float XStart, YStart, X, Y;
606 int IndexX0, IndexY0, SrcOffset, DestOffset;
607 int x, y, i, n, c, Success = 0;
612 if(!(Temp = (
float *)
Malloc(
sizeof(
float)*3*CoarseWidth*InterpHeight))
613 || !(PsfBuf = (
float *)
Malloc(
sizeof(
float)*PsfWidth)))
618 XStart = (1.0f/ScaleFactor - 1)/2;
619 YStart = (1.0f/ScaleFactor - 1)/2;
625 ExpDenom = 2 *
Sqr((
float)(Param.
PsfSigma*ScaleFactor));
627 ExpDenom = 2 *
Sqr(1e-2f*ScaleFactor);
629 if(Pad < ScaleFactor)
632 for(x = 0; x < CoarseWidth; x++)
634 X = (-XStart + x)*ScaleFactor;
635 IndexX0 = (int)ceil(X - PsfRadius);
638 for(n = 0; n < PsfWidth; n++)
639 PsfBuf[n] = (
float)exp(-
Sqr(X - (IndexX0 + n)) / ExpDenom);
641 for(y = 0, SrcOffset = 0, DestOffset = 3*x; y < InterpHeight;
642 y++, SrcOffset += InterpWidth, DestOffset += CoarseStride)
644 Sum[0] = Sum[1] = Sum[2] = DenomSum = 0;
646 for(n = 0; n < PsfWidth; n++)
650 i = 3*(ConstExtension(InterpWidth, IndexX0 + n) + SrcOffset);
652 for(c = 0; c < 3; c++)
653 Sum[c] += Weight * Interpolation[i + c];
656 for(c = 0; c < 3; c++)
657 Temp[DestOffset + c] = Sum[c] / DenomSum;
662 x2 = CoarseStride - 3*Pad;
664 for(y = 0; y < CoarseHeight; y++,
665 Residual += CoarseStride, Input += CoarseStride)
667 if(!(y >= Pad && y < CoarseHeight-Pad))
670 Y = (-YStart + y)*ScaleFactor;
671 IndexY0 = (int)ceil(Y - PsfRadius);
674 for(n = 0; n < PsfWidth; n++)
675 PsfBuf[n] = (
float)exp(-
Sqr(Y - (IndexY0 + n)) / ExpDenom);
677 for(x = x1; x < x2; x += 3)
679 Sum[0] = Sum[1] = Sum[2] = DenomSum = 0;
681 for(n = 0; n < PsfWidth; n++)
683 SrcOffset = x + CoarseStride*ConstExtension(InterpHeight, IndexY0 + n);
687 for(c = 0; c < 3; c++)
688 Sum[c] += Weight * Temp[SrcOffset + c];
693 for(c = 0; c < 3; c++)
695 Sum[c] = Input[x + c] - Sum[c] / DenomSum;
696 Residual[x + c] = (int32_t)
ROUND(Sum[c]);
698 if(abs(Residual[x + c]) > ResNorm)
699 ResNorm = abs(Residual[x + c]);
708 return (Success) ? ResNorm : -1;
730 static void ConvertInput(int32_t *FixedRgb,
const uint32_t *Input,
int InputWidth,
731 int InputHeight,
int Padding)
733 const uint8_t *InputPtr = (uint8_t *)Input;
734 const int InputStride = 4*InputWidth;
735 const int Stride =
PIXEL_STRIDE*(InputWidth + 2*Padding);
740 FixedRgb += Padding*Stride;
742 for(Row = InputHeight; Row; Row--, InputPtr += InputStride)
749 for(i = Padding; i; i--)
757 for(i = 0; i < InputStride; i += 4)
769 for(i = Padding; i; i--)
778 for(Row = Padding; Row; Row--, FixedRgb += Stride)
779 memcpy(FixedRgb, FixedRgb - Stride,
sizeof(int32_t)*Stride);
781 FixedRgb -= Stride*(InputHeight + Padding);
784 for(Row = Padding; Row; Row--, FixedRgb -= Stride)
785 memcpy(FixedRgb - Stride, FixedRgb,
sizeof(int32_t)*Stride);
812 static void ConvertOutput(uint32_t *Output,
int OutputWidth,
int OutputHeight,
813 const int32_t *FixedRgb,
int Width)
815 uint8_t *OutputPtr = (uint8_t *)Output;
821 for(Row = OutputHeight; Row; Row--, FixedRgb += Stride)
830 *(OutputPtr++) =
CLAMP(r, 0, 255);
831 *(OutputPtr++) =
CLAMP(g, 0, 255);
832 *(OutputPtr++) =
CLAMP(b, 0, 255);
833 *(OutputPtr++) = 0xFF;
847 int CWInterp(uint32_t *Output,
const uint32_t *Input,
848 int InputWidth,
int InputHeight,
const int32_t *Psi,
cwparams Param)
850 const int ScaleFactor = (int)ceil(Param.
ScaleFactor);
851 const int SupportRadius = (
NEIGHRADIUS+1)*ScaleFactor - 1;
852 const int SupportWidth = 2*SupportRadius + 1;
853 const int SupportSize = SupportWidth*SupportWidth;
854 const int StencilMul = NUMNEIGH*SupportSize;
856 int32_t *InputFixed = NULL, *OutputFixed = NULL, *Residual = NULL;
857 unsigned long StartTime, StopTime;
858 int i, PadInput, pw, ph, Success = 0;
866 PadInput = 4 + (ScaleFactor + 1)/2;
867 pw = InputWidth + 2*PadInput;
868 ph = InputHeight + 2*PadInput;
870 if( !(OutputFixed = (int32_t *)
Malloc(
sizeof(int32_t)*
874 || !(Stencil = (
int *)
Malloc(
sizeof(
int)*pw*ph)) )
881 ConvertInput(InputFixed, Input, InputWidth, InputHeight, PadInput);
884 if(!
FitStencils(Stencil, InputFixed, pw, ph, StencilMul))
887 memset(OutputFixed, 0,
sizeof(int32_t)*
888 3*pw*ScaleFactor*ph*ScaleFactor);
889 memset(Residual, 0,
sizeof(int32_t)*3*pw*ph);
891 printf(
"\n Iteration Residual norm\n -------------------------\n");
894 CWFirstPass(OutputFixed, ScaleFactor, InputFixed, pw, ph, Stencil, Psi);
900 if((ResNorm = CWResidual(Residual, OutputFixed, InputFixed,
901 pw, ph, Param)) < 0.0)
904 printf(
" %8d %15.8f\n", i, ResNorm/(255.0*256.0));
907 CWRefinementPass(OutputFixed, ScaleFactor, Residual, pw, ph, Stencil, Psi);
911 ConvertOutput(Output, InputWidth*ScaleFactor, InputHeight*ScaleFactor,
912 OutputFixed +
PIXEL_STRIDE*PadInput*ScaleFactor*(1 + pw*ScaleFactor),
921 ResNorm = CWResidual(Residual, OutputFixed, InputFixed, pw, ph, Param);
922 printf(
" %8d %15.8f\n\n", Param.
RefinementSteps + 1, ResNorm/(255.0*256.0));
925 printf(
" CPU time: %.3f s\n\n", 0.001*(StopTime - StartTime));
941 #define ROUNDCLAMP(X,A,B) (((X) < (A)) ? (A) : (((X) > (B)) ? (B) : ROUND(X)))
954 #define XY_FRACBITS 15
955 #define TRIG_FRACBITS 10
956 #define PHITN_FRACBITS 9
957 #define PHI_FRACBITS 10
958 #define COEFF_FRACBITS 10
959 #define WINDOW_FRACBITS 10
960 #define UK_FRACBITS 10
962 #define ROUND_FIXED(X,N) (((X) + FIXED_HALF(N)) >> (N))
963 #define FLOAT_TO_FIXED(X,N) ((int32_t)ROUND((X) * FIXED_ONE(N)))
967 const int32_t *Input,
int InputWidth,
int InputHeight,
968 const int *Stencil,
const double *InverseA,
cwparams Param)
971 int (*Extension)(int, int) = ConstExtension;
972 const int InputNumEl = 3*InputWidth*InputHeight;
973 const int ExpTableSize = 1024;
974 const double ExpArgScale = 37.0236;
976 const double PhiNScale = sqrt(ExpArgScale/2)/Param.
PhiSigmaNormal;
978 int32_t *Coeff = NULL, *CoeffPtr, *ExpTable = NULL;
980 float X, Y, XStart, YStart;
982 int32_t v0[3], v[3], WindowWeight, WindowWeightX[4], WindowWeightY[4];
983 int32_t Xpf, Ypf, Weight, uk[3], u[3], DenomSum;
987 int i, k, x, y, m, n, mx, my, nx, ny, S, Success = 0;
988 int ix, iy, Cur, Offset;
991 if(!(Coeff = (int32_t *)
Malloc(
sizeof(int32_t)*(NUMNEIGH + 1)*InputNumEl))
992 || !(ExpTable = (int32_t *)
Malloc(
sizeof(int32_t)*ExpTableSize)))
1001 XStart = YStart = 0;
1015 for(i = 0; i < ExpTableSize; i++)
1018 for(y = 0, k = 0, CoeffPtr = Coeff; y < InputHeight; y++)
1019 for(x = 0; x < InputWidth; x++, k++, CoeffPtr += 3*(NUMNEIGH + 1))
1021 S = NUMNEIGH*Stencil[k];
1023 v0[0] = Input[3*k + 0];
1024 v0[1] = Input[3*k + 1];
1025 v0[2] = Input[3*k + 2];
1036 Offset = InputWidth*Extension(InputHeight, y + ny);
1042 i = 3*(Extension(InputWidth, x + nx) + Offset);
1043 v[0] = Input[i + 0];
1044 v[1] = Input[i + 1];
1045 v[2] = Input[i + 2];
1049 Temp = (float)InverseA[m + NUMNEIGH*(n + S)];
1050 cr[m] += Temp * (v[0] - v0[0]);
1051 cg[m] += Temp * (v[1] - v0[1]);
1052 cb[m] += Temp * (v[2] - v0[2]);
1076 for(y = 0, k = 0; y < OutputHeight; y++)
1086 for(x = 0; x < OutputWidth; x++, k++)
1089 ix = (int)ceil(X - 2*NEIGHRADIUS);
1097 u[0] = u[1] = u[2] = 0;
1101 if((iy + my) >= 0 && (iy + my) < InputHeight)
1104 i = ix + InputWidth*(iy + my), CoeffPtr = Coeff + i*3*(NUMNEIGH + 1);
1107 if((ix + mx) < 0 || (ix + mx) >= InputWidth)
1111 WindowWeight = (WindowWeightX[mx] * WindowWeightY[my]);
1113 DenomSum += WindowWeight;
1123 uk[0] = CoeffPtr[0];
1124 uk[1] = CoeffPtr[1];
1125 uk[2] = CoeffPtr[2];
1127 for(ny = -NEIGHRADIUS, n = 3; ny <=
NEIGHRADIUS; ny++)
1128 for(nx = -NEIGHRADIUS; nx <=
NEIGHRADIUS; nx++, n += 3)
1152 if(Cur >= ExpTableSize)
1157 Weight = ExpTable[Cur];
1163 uk[0] += (CoeffPtr[n + 0] * Weight
1166 uk[1] += (CoeffPtr[n + 1] * Weight
1169 uk[2] += (CoeffPtr[n + 2] * Weight
1175 u[0] += WindowWeight * uk[0];
1176 u[1] += WindowWeight * uk[1];
1177 u[2] += WindowWeight * uk[2];
1191 #if WINDOW_FRACBITS > UK_FRACBITS
1197 u[0] = (u[0] + DenomSum/2) / DenomSum;
1198 u[1] = (u[1] + DenomSum/2) / DenomSum;
1199 u[2] = (u[2] + DenomSum/2) / DenomSum;
1203 ((uint8_t *)&Pixel)[0] =
CLAMP(u[0],0,255);
1204 ((uint8_t *)&Pixel)[1] =
CLAMP(u[1],0,255);
1205 ((uint8_t *)&Pixel)[2] =
CLAMP(u[2],0,255);
1219 static void AddResidual(int32_t *InputAdjusted,
const int32_t *Residual,
1220 int InputWidth,
int InputHeight,
int PadInput)
1222 const int PadWidth = InputWidth + 2*PadInput;
1223 const int RowEl = 3*InputWidth;
1224 const int PadRowEl = 3*PadWidth;
1227 Residual += 3*(PadInput + PadInput*PadWidth);
1229 for(Row = InputHeight; Row; Row--)
1231 for(i = 0; i < RowEl; i++)
1232 InputAdjusted[i] += Residual[i];
1234 InputAdjusted += RowEl;
1235 Residual += PadRowEl;
1241 static void StencilStripPad(
int *Stencil,
1242 int InputWidth,
int InputHeight,
int PadInput,
int StencilMul)
1244 const int PadWidth = InputWidth + 2*PadInput;
1248 Src = Stencil + (PadInput + PadInput*PadWidth);
1250 for(Row = InputHeight; Row; Row--)
1252 for(i = 0; i < InputWidth; i++)
1253 Stencil[i] = Src[i] / StencilMul;
1255 Stencil += InputWidth;
1270 int CWInterpEx(uint32_t *Output,
int OutputWidth,
int OutputHeight,
1271 const uint32_t *Input,
int InputWidth,
int InputHeight,
1272 const int32_t *Psi,
cwparams Param)
1274 const int ScaleFactor = (int)ceil(Param.
ScaleFactor);
1275 const int SupportRadius = (
NEIGHRADIUS+1)*ScaleFactor - 1;
1276 const int SupportWidth = 2*SupportRadius + 1;
1277 const int SupportSize = SupportWidth*SupportWidth;
1278 const int StencilMul = NUMNEIGH*SupportSize;
1279 double *InverseA = NULL;
1280 int *Stencil = NULL;
1281 int32_t *InputFixed = NULL, *InputAdjusted = NULL, *OutputFixed = NULL, *Residual = NULL;
1282 unsigned long StartTime, StopTime;
1283 int i, PadInput, pw, ph, Success = 0;
1291 PadInput = 4 + (ScaleFactor + 1)/2;
1292 pw = InputWidth + 2*PadInput;
1293 ph = InputHeight + 2*PadInput;
1295 if( !(InverseA = (
double *)
Malloc(
sizeof(
double)*NUMNEIGH*NUMNEIGH*NUMSTENCILS))
1296 || !(OutputFixed = (int32_t *)
Malloc(
sizeof(int32_t)*
1299 || !(InputAdjusted = (int32_t *)
Malloc(
sizeof(int32_t)*
PIXEL_STRIDE*InputWidth*InputHeight))
1301 || !(Stencil = (
int *)
Malloc(
sizeof(
int)*pw*ph)) )
1304 if(!ComputeMatrices(InverseA, Param))
1308 StartTime =
Clock();
1313 ConvertInput(InputFixed, Input, InputWidth, InputHeight, PadInput);
1315 memset(InputAdjusted, 0,
sizeof(int32_t)*3*InputWidth*InputHeight);
1316 AddResidual(InputAdjusted, InputFixed, InputWidth, InputHeight, PadInput);
1319 if(!
FitStencils(Stencil, InputFixed, pw, ph, StencilMul))
1322 memset(OutputFixed, 0,
sizeof(int32_t)*
1323 3*pw*ScaleFactor*ph*ScaleFactor);
1324 memset(Residual, 0,
sizeof(int32_t)*3*pw*ph);
1326 printf(
"\n Iteration Residual norm\n -------------------------\n");
1329 CWFirstPass(OutputFixed, ScaleFactor, InputFixed, pw, ph, Stencil, Psi);
1335 if((ResNorm = CWResidual(Residual, OutputFixed, InputFixed,
1336 pw, ph, Param)) < 0.0)
1339 printf(
" %8d %15.8f\n", i, ResNorm/(255.0*256.0));
1341 AddResidual(InputAdjusted, Residual, InputWidth, InputHeight, PadInput);
1346 CWRefinementPass(OutputFixed, ScaleFactor, Residual, pw, ph, Stencil, Psi);
1350 StencilStripPad(Stencil, InputWidth, InputHeight, PadInput, StencilMul);
1355 ConvertInput(InputAdjusted, Input, InputWidth, InputHeight, 0);
1358 if(!
FitStencils(Stencil, InputAdjusted, InputWidth, InputHeight, 1))
1363 InputAdjusted, InputWidth, InputHeight, Stencil, InverseA, Param))
1373 printf(
" CPU time: %.3f s\n\n", 0.001*(StopTime - StartTime));
1382 Free(InputAdjusted);
1392 uint32_t *Input,
int InputWidth,
int InputHeight,
cwparams Param)
1395 const float LineColor[3] = {0, 0, 0};
1398 int32_t *InputInt = NULL;
1400 int x, y, S, pw, ph, Success = 0;
1403 pw = InputWidth + 2*Pad;
1404 ph = InputHeight + 2*Pad;
1406 if( !(InputInt = (int32_t *)
Malloc(
sizeof(int32_t)*3*pw*ph))
1407 || !(Stencil = (
int *)
Malloc(
sizeof(
int)*pw*ph)) )
1411 ConvertInput(InputInt, Input, InputWidth, InputHeight, Pad);
1418 for(y = 0; y < InputHeight; y++)
1419 for(x = 0; x < InputWidth; x++)
1421 Pixel = Input[x + InputWidth*y];
1422 ((uint8_t*)&Pixel)[0] = (uint8_t)(((uint8_t*)&Pixel)[0]/2 + 128);
1423 ((uint8_t*)&Pixel)[1] = (uint8_t)(((uint8_t*)&Pixel)[1]/2 + 128);
1424 ((uint8_t*)&Pixel)[2] = (uint8_t)(((uint8_t*)&Pixel)[2]/2 + 128);
1425 Input[x + InputWidth*y] = Pixel;
1430 Input, InputWidth, InputHeight,
1434 for(y = 0; y < InputHeight; y++)
1435 for(x = 0; x < InputWidth; x++)
1437 S = Stencil[(x + Pad) + pw*(y + Pad)];
1438 dx = (float)cos(StencilOrientation[S])*0.6f;
1439 dy = (float)sin(StencilOrientation[S])*0.6f;
1440 DrawLine(Output, OutputWidth, OutputHeight,
1444 (
float)Param.
ScaleFactor*(y + dy + 0.5f) - 0.5f, LineColor);