29 int width,
int height,
float alpha,
const char *omega_string,
32 int width,
int height,
float alpha,
const char *omega_string,
int degree);
34 float *omega,
int width,
int height,
const char *omega_string);
35 static void get_min_max(
float *min_ptr,
float *max_ptr,
36 const float *data,
size_t num_samples);
37 static void int_pow(
float *dest,
const float *src,
size_t num_samples,
int m);
38 static void stretch(
float *image,
long num_pixels);
39 static void convolve(
float *blurred_trans,
const float *omega_trans,
40 long num_pixels, fftwf_plan forward_plan, fftwf_plan inverse_plan);
42 fftwf_plan (**plans_ptr)[2],
int width,
int height,
int num_buffers);
44 fftwf_plan (*plans)[2],
int num_buffers);
79 int width,
int height,
float alpha,
const char *omega_string,
82 const long num_pixels = ((long)width) * ((long)height);
83 const long pad_num_pixels = ((long)width + 1) * ((long)height + 1);
85 const int num_threads = omp_get_max_threads();
87 const int num_threads = 1;
89 const int num_buffers = num_threads + 1;
90 fftwf_plan (*plans)[2] = NULL;
91 float **buffers = NULL, *omega_trans = NULL;
92 int *buffer_levels = NULL, *buffer_channels = NULL;
93 float min[3], max[3], level_step[3];
95 int thread, num_conv, index, steps, channel, success = 0;
98 if(!(omega_trans = (
float *)fftwf_malloc(
sizeof(
float)*pad_num_pixels))
101 width, height, num_buffers)
102 || !(buffer_levels = (
int *)malloc(
sizeof(
int)*num_buffers))
103 || !(buffer_channels = (
int *)malloc(
sizeof(
int)*num_buffers)))
106 for(i = 0; i < num_pixels * 3; i++)
111 #pragma omp parallel for schedule(dynamic) private(i)
113 for(channel = 0; channel < 3; channel++)
116 f + num_pixels * channel, num_pixels);
118 if(min[channel] >= max[channel])
124 level_step[channel] = (max[channel] - min[channel])
128 for(i = 0; i < num_buffers; i++)
129 buffer_levels[i] = buffer_channels[i] = -1;
139 for(steps = 0; steps < 3*num_levels;)
141 if(steps + num_threads > 3*num_levels)
142 num_conv = 3*num_levels - steps;
143 else if(steps == 0 && num_threads == 1)
146 num_conv = num_threads;
150 #pragma omp parallel for schedule(dynamic) private(i)
152 for(thread = 0; thread < num_conv; thread++)
154 int level = (steps + thread) % num_levels;
155 int channel = (steps + thread) / num_levels;
156 int index = (steps + thread) % num_buffers;
157 const float *src = f + num_pixels * channel;
158 float *dest = buffers[index];
159 float L = min[channel] + level_step[channel]*level;
161 buffer_levels[index] = level;
162 buffer_channels[index] = channel;
164 for(i = 0; i < num_pixels; i++)
166 float diff = alpha * (L - src[i]);
167 dest[i] = (diff <= -1) ? -1 : ((diff >= 1) ? 1 : diff);
171 convolve(dest, omega_trans, num_pixels,
172 plans[index][0], plans[index][1]);
175 for(thread = num_conv; thread < num_threads; thread++)
176 buffer_channels[(steps + thread) % num_buffers] = -1;
180 #pragma omp parallel for schedule(dynamic) private(i,channel)
182 for(index = 0; index < num_buffers; index++)
184 int index_prev = (index) ? (index - 1) : num_threads;
186 const float *src, *buffer0, *buffer1;
189 if((channel = buffer_channels[index])
190 != buffer_channels[index_prev]
192 || buffer_levels[index_prev] + 1 != buffer_levels[index])
195 L0 = min[channel] + level_step[channel]*(buffer_levels[index]-1);
196 L1 = min[channel] + level_step[channel]*buffer_levels[index];
197 dest = u + num_pixels * channel;
198 src = f + num_pixels * channel;
199 buffer0 = buffers[index_prev];
200 buffer1 = buffers[index];
203 if(buffer_levels[index] < num_levels - 1)
205 for(i = 0; i < num_pixels; i++)
206 if(L0 <= src[i] && src[i] < L1)
207 dest[i] = buffer0[i] + (buffer1[i] - buffer0[i])
208 *(src[i] - L0)/level_step[channel];
211 for(i = 0; i < num_pixels; i++)
213 dest[i] = buffer0[i] + (buffer1[i] - buffer0[i])
214 *(src[i] - L0)/level_step[channel];
225 free(buffer_channels);
230 fftwf_free(omega_trans);
267 int width,
int height,
float alpha,
const char *omega_string,
int degree)
269 #ifndef DOXYGEN_SHOULD_SKIP_THIS
272 const float *slope_coeffs[5];
274 const int num_threads = omp_get_max_threads();
276 const int num_threads = 1;
278 const long num_pixels = ((long)width) * ((long)height);
279 const long pad_num_pixels = ((long)width + 1) * ((long)height + 1);
280 float **buffers = NULL, *omega_trans = NULL, *poly_coeffs = NULL;
281 fftwf_plan (*plans)[2] = NULL;
283 int iter, n, channel, success = 0;
285 slope_coeffs[0] = slope_coeff_3;
286 slope_coeffs[1] = slope_coeff_5;
287 slope_coeffs[2] = slope_coeff_7;
288 slope_coeffs[3] = slope_coeff_9;
289 slope_coeffs[4] = slope_coeff_11;
297 if(!(omega_trans = (
float *)fftwf_malloc(
sizeof(
float)*pad_num_pixels))
300 width, height, num_threads)
301 || !(poly_coeffs = (
float *)malloc(
302 sizeof(
float)*(degree + 1)*(degree + 1))))
307 int degree_index = (degree - 3) / 2;
308 int alpha_index = (int)(2*alpha + 0.5f) - 2;
312 else if(alpha_index > 14)
315 alpha_index *= (degree_index + 2);
318 for(n = 0; n <= degree; n++)
322 for(m = n + ((n % 2 == 0) ? 1 : 0); m <= degree; m += 2)
323 poly_coeffs[(degree + 1)*n + m] = (((m - n + 1) % 2) ? -1 : 1)
324 * slope_coeffs[degree_index][alpha_index + (m - 1)/2]
331 #pragma omp parallel for schedule(dynamic) private(i)
333 for(channel = 0; channel < 3; channel++)
335 const float *src = f + num_pixels * channel;
336 float *dest = u + num_pixels * channel;
338 for(i = 0; i < num_pixels; i++)
341 float TempSqr = Temp*Temp;
342 float a = poly_coeffs[degree];
346 a = a*TempSqr + poly_coeffs[m -= 2];
354 #pragma omp parallel for schedule(dynamic) private(i)
356 for(iter = 0; iter < 3*degree; iter++)
359 const int threadId = omp_get_thread_num();
361 const int threadId = 0;
363 int channel = iter / degree;
364 int n = 1 + (iter % degree);
365 const float *src = f + num_pixels * channel;
366 float *dest = u + num_pixels * channel;
367 float *Blurred = buffers[threadId];
370 int_pow(Blurred, src, num_pixels, n);
372 convolve(Blurred, omega_trans, num_pixels,
373 plans[threadId][0], plans[threadId][1]);
375 for(i = 0; i < num_pixels; i++)
378 float TempSqr = Temp*Temp;
379 float a = poly_coeffs[(degree + 1)*n + degree];
385 a = a*TempSqr + poly_coeffs[(degree + 1)*n + m];
394 dest[i] += a * Blurred[i];
400 for(i = 0; i < num_pixels; i++)
401 dest[i] += Blurred[i];
413 fftwf_free(omega_trans);
428 float *omega,
int width,
int height,
const char *omega_string)
430 fftwf_plan plan = NULL;
431 long pad_num_pixels = ((long)width + 1) * ((long)height + 1);
435 if(!omega_string || !strcmp(omega_string,
"1/r"))
436 for(y = 0, i = 0; y <= height; y++)
437 for(x = 0; x <= width; x++, i++)
438 omega[i] = (x == 0 && y == 0) ? 0
439 : 1.0f/sqrt(x*x + y*y);
440 else if(!strcmp(omega_string,
"1"))
441 for(y = 0, i = 0; y <= height; y++)
442 for(x = 0; x <= width; x++, i++)
444 else if(strlen(omega_string) >= 3
445 && omega_string[0] ==
'G' && omega_string[1] ==
':')
447 double sigma = atof(omega_string + 2);
452 for(y = 0, i = 0; y <= height; y++)
453 for(x = 0; x <= width; x++, i++)
454 omega[i] = exp(-(x*x + y*y)/(2*sigma*sigma));
459 for(y = 0, i = 0, sum = 0; y <= height; y++)
460 for(x = 0; x <= width; x++, i++)
461 sum += ((x == 0 || x == width) ? 1 : 2)
462 * ((y == 0 || y == height) ? 1 : 2)
465 sum *= 4*pad_num_pixels;
467 for(i = 0; i < pad_num_pixels; i++)
470 if(!(plan = fftwf_plan_r2r_2d(height + 1, width + 1, omega, omega_trans,
471 FFTW_REDFT00, FFTW_REDFT00, FFTW_ESTIMATE | FFTW_DESTROY_INPUT)))
475 fftwf_destroy_plan(plan);
478 for(y = 1, i = width; y < height; y++, i += width)
479 memmove(omega_trans + i, omega_trans + i + y,
sizeof(
float)*width);
492 const float *data,
size_t num_samples)
499 *min_ptr = *max_ptr = 0;
505 for(i = 1; i < num_samples; i++)
508 else if(max < data[i])
523 static void int_pow(
float *dest,
const float *src,
size_t num_samples,
int m)
530 memcpy(dest, src,
sizeof(
float)*num_samples);
533 for(i = 0; i < num_samples; i++)
540 for(i = 0; i < num_samples; i++)
547 for(i = 0; i < num_samples; i++)
555 for(i = 0; i < num_samples; i++)
559 dest[i] = x2 * x2 * x;
563 for(i = 0; i < num_samples; i++)
567 dest[i] = x2 * x2 * x2;
571 for(i = 0; i < num_samples; i++)
575 dest[i] = x2 * x2 * x2 * x;
579 for(i = 0; i < num_samples; i++)
588 for(i = 0; i < num_samples; i++)
593 dest[i] = x4 * x4 * x;
597 for(i = 0; i < num_samples; i++)
602 dest[i] = x4 * x4 * x2;
606 for(i = 0; i < num_samples; i++)
611 dest[i] = x4 * x4 * x2 * x;
615 for(i = 0; i < num_samples; i++)
616 dest[i] = (
float)pow(src[i], m);
629 static void stretch(
float *image,
long num_pixels)
634 #pragma omp parallel for schedule(dynamic)
636 for(channel = 0; channel < 3; channel++)
638 float *dest = image + channel * num_pixels;
639 float min, max, Scale;
645 for(n = 0, Scale = max - min; n < num_pixels; n++)
646 dest[n] = (dest[n] - min) / Scale;
651 static void convolve(
float *blurred_trans,
const float *omega_trans,
652 long num_pixels, fftwf_plan forward_plan, fftwf_plan inverse_plan)
656 fftwf_execute(forward_plan);
658 for(i = 0; i < num_pixels; i++)
659 blurred_trans[i] *= omega_trans[i];
661 fftwf_execute(inverse_plan);
666 fftwf_plan (**plans_ptr)[2],
int width,
int height,
int num_buffers)
668 const long num_pixels = ((long)width) * ((long)height);
670 fftwf_plan (*plans)[2];
673 if(!(*buffers_ptr = buffers = (
float **)
674 malloc(
sizeof(
float *)*num_buffers)))
677 for(i = 0; i < num_buffers; i++)
680 if(!(*plans_ptr = plans = (fftwf_plan (*)[2])
681 malloc(
sizeof(fftwf_plan)*2*num_buffers)))
684 for(i = 0; i < num_buffers; i++)
685 plans[i][0] = plans[i][1] = NULL;
689 for(i = 0; i < num_buffers; i++)
690 if(!(buffers[i] = (
float *)
691 fftwf_malloc(
sizeof(
float)*num_pixels))
693 || !(plans[i][0] = fftwf_plan_r2r_2d(height, width, buffers[i],
694 buffers[i], FFTW_REDFT10, FFTW_REDFT10, FFTW_ESTIMATE))
696 || !(plans[i][1] = fftwf_plan_r2r_2d(height, width, buffers[i],
697 buffers[i], FFTW_REDFT01, FFTW_REDFT01, FFTW_ESTIMATE)))
705 fftwf_plan (*plans)[2],
int num_buffers)
709 for(i = 0; i < num_buffers; i++)
712 fftwf_destroy_plan(plans[i][1]);
714 fftwf_destroy_plan(plans[i][0]);
716 fftwf_free(buffers[i]);
750 static const double table[15] = {1, 1, 2, 6, 24, 120, 720, 5040, 40320,
751 362880, 3628800, 39916800, 479001600, 6227020800, 87178291200};