Automatic Color Enhancement
ace.c
Go to the documentation of this file.
1 
20 #include <stdlib.h>
21 #include <string.h>
22 #include <math.h>
23 #include <fftw3.h>
24 #ifdef _OPENMP
25 #include <omp.h>
26 #endif
27 
28 int ace_enhance_image_interp(float *u, const float *f,
29  int width, int height, float alpha, const char *omega_string,
30  int num_levels);
31 int ace_enhance_image_poly(float *u, const float *f,
32  int width, int height, float alpha, const char *omega_string, int degree);
33 static int compute_omega_trans(float *omega_trans,
34  float *omega, int width, int height, const char *omega_string);
35 static void get_min_max(float *min_ptr, float *max_ptr,
36  const float *data, size_t num_samples);
37 static void int_pow(float *dest, const float *src, size_t num_samples, int m);
38 static void stretch(float *image, long num_pixels);
39 static void convolve(float *blurred_trans, const float *omega_trans,
40  long num_pixels, fftwf_plan forward_plan, fftwf_plan inverse_plan);
41 static int alloc_buffers_and_plans(float ***buffers_ptr,
42  fftwf_plan (**plans_ptr)[2], int width, int height, int num_buffers);
43 static void free_buffers_and_plans(float **buffers,
44  fftwf_plan (*plans)[2], int num_buffers);
45 static double binom_coeff(int m, int n);
46 static double factorial(int n);
47 
48 
78 int ace_enhance_image_interp(float *u, const float *f,
79  int width, int height, float alpha, const char *omega_string,
80  int num_levels)
81 {
82  const long num_pixels = ((long)width) * ((long)height);
83  const long pad_num_pixels = ((long)width + 1) * ((long)height + 1);
84 #ifdef _OPENMP
85  const int num_threads = omp_get_max_threads();
86 #else
87  const int num_threads = 1;
88 #endif
89  const int num_buffers = num_threads + 1;
90  fftwf_plan (*plans)[2] = NULL;
91  float **buffers = NULL, *omega_trans = NULL;
92  int *buffer_levels = NULL, *buffer_channels = NULL;
93  float min[3], max[3], level_step[3];
94  long i;
95  int thread, num_conv, index, steps, channel, success = 0;
96 
97  /* Allocate memory */
98  if(!(omega_trans = (float *)fftwf_malloc(sizeof(float)*pad_num_pixels))
99  || !compute_omega_trans(omega_trans, u, width, height, omega_string)
100  || !alloc_buffers_and_plans(&buffers, &plans,
101  width, height, num_buffers)
102  || !(buffer_levels = (int *)malloc(sizeof(int)*num_buffers))
103  || !(buffer_channels = (int *)malloc(sizeof(int)*num_buffers)))
104  goto fail;
105 
106  for(i = 0; i < num_pixels * 3; i++)
107  u[i] = -1.5;
108 
109  /* Find min and max of source image for each channel */
110 #ifdef _OPENMP
111 #pragma omp parallel for schedule(dynamic) private(i)
112 #endif
113  for(channel = 0; channel < 3; channel++)
114  {
115  get_min_max(&min[channel], &max[channel],
116  f + num_pixels * channel, num_pixels);
117 
118  if(min[channel] >= max[channel])
119  {
120  min[channel] = 0;
121  max[channel] = 1;
122  }
123 
124  level_step[channel] = (max[channel] - min[channel])
125  / (num_levels - 1);
126  }
127 
128  for(i = 0; i < num_buffers; i++)
129  buffer_levels[i] = buffer_channels[i] = -1;
130 
131  /* The following loop is the main computation. The ACE convolutions
132  *
133  * omega(x) * s_a(L - I(x))
134  *
135  * are computed for L = min[channel] + level_step[channel]*level, where
136  * level = 0, ..., num_levels-1. The R_L are then piecewise linearly
137  * interpolated to approximate R.
138  */
139  for(steps = 0; steps < 3*num_levels;)
140  {
141  if(steps + num_threads > 3*num_levels)
142  num_conv = 3*num_levels - steps;
143  else if(steps == 0 && num_threads == 1)
144  num_conv = 2;
145  else
146  num_conv = num_threads;
147 
148  /* Compute num_conv convolutions in parallel and store in buffers */
149 #ifdef _OPENMP
150 #pragma omp parallel for schedule(dynamic) private(i)
151 #endif
152  for(thread = 0; thread < num_conv; thread++)
153  {
154  int level = (steps + thread) % num_levels;
155  int channel = (steps + thread) / num_levels;
156  int index = (steps + thread) % num_buffers;
157  const float *src = f + num_pixels * channel;
158  float *dest = buffers[index];
159  float L = min[channel] + level_step[channel]*level;
160 
161  buffer_levels[index] = level;
162  buffer_channels[index] = channel;
163 
164  for(i = 0; i < num_pixels; i++)
165  {
166  float diff = alpha * (L - src[i]);
167  dest[i] = (diff <= -1) ? -1 : ((diff >= 1) ? 1 : diff);
168  }
169 
170  /* Compute buffers[index] = omega(x) * s_a(L - I(x)) */
171  convolve(dest, omega_trans, num_pixels,
172  plans[index][0], plans[index][1]);
173  }
174 
175  for(thread = num_conv; thread < num_threads; thread++)
176  buffer_channels[(steps + thread) % num_buffers] = -1;
177 
178  /* Interpolate between the levels */
179 #ifdef _OPENMP
180 #pragma omp parallel for schedule(dynamic) private(i,channel)
181 #endif
182  for(index = 0; index < num_buffers; index++)
183  {
184  int index_prev = (index) ? (index - 1) : num_threads;
185  float *dest;
186  const float *src, *buffer0, *buffer1;
187  float L0, L1;
188 
189  if((channel = buffer_channels[index])
190  != buffer_channels[index_prev]
191  || channel < 0
192  || buffer_levels[index_prev] + 1 != buffer_levels[index])
193  continue; /* Skip buffers that are not adjacent levels */
194 
195  L0 = min[channel] + level_step[channel]*(buffer_levels[index]-1);
196  L1 = min[channel] + level_step[channel]*buffer_levels[index];
197  dest = u + num_pixels * channel;
198  src = f + num_pixels * channel;
199  buffer0 = buffers[index_prev];
200  buffer1 = buffers[index];
201 
202  /* Piecewise linear interpolation for the interval [L0, L1) */
203  if(buffer_levels[index] < num_levels - 1)
204  {
205  for(i = 0; i < num_pixels; i++)
206  if(L0 <= src[i] && src[i] < L1)
207  dest[i] = buffer0[i] + (buffer1[i] - buffer0[i])
208  *(src[i] - L0)/level_step[channel];
209  }
210  else /* Rightmost interval */
211  for(i = 0; i < num_pixels; i++)
212  if(L0 <= src[i])
213  dest[i] = buffer0[i] + (buffer1[i] - buffer0[i])
214  *(src[i] - L0)/level_step[channel];
215  }
216 
217  steps += num_conv;
218  }
219 
220  stretch(u, num_pixels);
221  success = 1;
222 fail:
223  /* Free memory */
224  if(buffer_channels)
225  free(buffer_channels);
226  if(buffer_levels)
227  free(buffer_levels);
228  free_buffers_and_plans(buffers, plans, num_buffers);
229  if(omega_trans)
230  fftwf_free(omega_trans);
231  fftwf_cleanup();
232  return success;
233 }
234 
235 
266 int ace_enhance_image_poly(float *u, const float *f,
267  int width, int height, float alpha, const char *omega_string, int degree)
268 {
269 #ifndef DOXYGEN_SHOULD_SKIP_THIS
270 #include "slopecoeff_inc.c"
271 #endif
272  const float *slope_coeffs[5];
273 #ifdef _OPENMP
274  const int num_threads = omp_get_max_threads();
275 #else
276  const int num_threads = 1;
277 #endif
278  const long num_pixels = ((long)width) * ((long)height);
279  const long pad_num_pixels = ((long)width + 1) * ((long)height + 1);
280  float **buffers = NULL, *omega_trans = NULL, *poly_coeffs = NULL;
281  fftwf_plan (*plans)[2] = NULL;
282  long i;
283  int iter, n, channel, success = 0;
284 
285  slope_coeffs[0] = slope_coeff_3;
286  slope_coeffs[1] = slope_coeff_5;
287  slope_coeffs[2] = slope_coeff_7;
288  slope_coeffs[3] = slope_coeff_9;
289  slope_coeffs[4] = slope_coeff_11;
290 
291  if(degree < 3)
292  degree = 3;
293  else if(degree > 11)
294  degree = 11;
295 
296  /* Allocate memory */
297  if(!(omega_trans = (float *)fftwf_malloc(sizeof(float)*pad_num_pixels))
298  || !compute_omega_trans(omega_trans, u, width, height, omega_string)
299  || !alloc_buffers_and_plans(&buffers, &plans,
300  width, height, num_threads)
301  || !(poly_coeffs = (float *)malloc(
302  sizeof(float)*(degree + 1)*(degree + 1))))
303  goto fail;
304 
305  /* Select polynomial from SlopeCoeff table */
306  {
307  int degree_index = (degree - 3) / 2;
308  int alpha_index = (int)(2*alpha + 0.5f) - 2;
309 
310  if(alpha_index < 0)
311  alpha_index = 0;
312  else if(alpha_index > 14)
313  alpha_index = 14;
314 
315  alpha_index *= (degree_index + 2);
316 
317  /* Precompute coefficients */
318  for(n = 0; n <= degree; n++)
319  {
320  int m;
321 
322  for(m = n + ((n % 2 == 0) ? 1 : 0); m <= degree; m += 2)
323  poly_coeffs[(degree + 1)*n + m] = (((m - n + 1) % 2) ? -1 : 1)
324  * slope_coeffs[degree_index][alpha_index + (m - 1)/2]
325  * binom_coeff(m, n);
326  }
327  }
328 
329  /* Special case for n = zero term */
330 #ifdef _OPENMP
331 #pragma omp parallel for schedule(dynamic) private(i)
332 #endif
333  for(channel = 0; channel < 3; channel++)
334  {
335  const float *src = f + num_pixels * channel;
336  float *dest = u + num_pixels * channel;
337 
338  for(i = 0; i < num_pixels; i++)
339  {
340  float Temp = src[i];
341  float TempSqr = Temp*Temp;
342  float a = poly_coeffs[degree];
343  int m = degree;
344 
345  while(m >= 2)
346  a = a*TempSqr + poly_coeffs[m -= 2];
347 
348  dest[i] = a * Temp;
349  }
350  }
351 
352  /* Most of the computation time is spent in this loop. */
353 #ifdef _OPENMP
354 #pragma omp parallel for schedule(dynamic) private(i)
355 #endif
356  for(iter = 0; iter < 3*degree; iter++)
357  {
358 #ifdef _OPENMP
359  const int threadId = omp_get_thread_num();
360 #else
361  const int threadId = 0;
362 #endif
363  int channel = iter / degree; /* = current image channel */
364  int n = 1 + (iter % degree); /* = current summation term */
365  const float *src = f + num_pixels * channel;
366  float *dest = u + num_pixels * channel;
367  float *Blurred = buffers[threadId];
368 
369  /* Compute Blurred = src to the nth power */
370  int_pow(Blurred, src, num_pixels, n);
371  /* convolve Blurred with omega */
372  convolve(Blurred, omega_trans, num_pixels,
373  plans[threadId][0], plans[threadId][1]);
374 
375  for(i = 0; i < num_pixels; i++)
376  {
377  float Temp = src[i];
378  float TempSqr = Temp*Temp;
379  float a = poly_coeffs[(degree + 1)*n + degree];
380  int m = degree;
381 
382  while(m - n >= 2)
383  {
384  m -= 2;
385  a = a*TempSqr + poly_coeffs[(degree + 1)*n + m];
386  }
387 
388  if(n % 2 == 0)
389  a *= Temp;
390 
391 #ifdef _OPENMP
392  Blurred[i] *= a;
393 #else
394  dest[i] += a * Blurred[i];
395 #endif
396  }
397 
398 #ifdef _OPENMP
399 #pragma omp critical
400  for(i = 0; i < num_pixels; i++)
401  dest[i] += Blurred[i];
402 #endif
403  }
404 
405  stretch(u, num_pixels);
406  success = 1;
407 fail:
408  /* Free memory */
409  if(poly_coeffs)
410  free(poly_coeffs);
411  free_buffers_and_plans(buffers, plans, num_threads);
412  if(omega_trans)
413  fftwf_free(omega_trans);
414  fftwf_cleanup();
415  return success;
416 }
417 
418 
427 static int compute_omega_trans(float *omega_trans,
428  float *omega, int width, int height, const char *omega_string)
429 {
430  fftwf_plan plan = NULL;
431  long pad_num_pixels = ((long)width + 1) * ((long)height + 1);
432  double sum;
433  long i, x, y;
434 
435  if(!omega_string || !strcmp(omega_string, "1/r"))
436  for(y = 0, i = 0; y <= height; y++) /* omega = 1/sqrt(x^2 + y^2) */
437  for(x = 0; x <= width; x++, i++)
438  omega[i] = (x == 0 && y == 0) ? 0
439  : 1.0f/sqrt(x*x + y*y);
440  else if(!strcmp(omega_string, "1"))
441  for(y = 0, i = 0; y <= height; y++) /* omega = 1*/
442  for(x = 0; x <= width; x++, i++)
443  omega[i] = 1.0f;
444  else if(strlen(omega_string) >= 3
445  && omega_string[0] == 'G' && omega_string[1] == ':')
446  {
447  double sigma = atof(omega_string + 2);
448 
449  if(sigma <= 0)
450  return 0;
451 
452  for(y = 0, i = 0; y <= height; y++) /* omega = Gaussian */
453  for(x = 0; x <= width; x++, i++)
454  omega[i] = exp(-(x*x + y*y)/(2*sigma*sigma));
455  }
456  else
457  return 0;
458 
459  for(y = 0, i = 0, sum = 0; y <= height; y++)
460  for(x = 0; x <= width; x++, i++)
461  sum += ((x == 0 || x == width) ? 1 : 2)
462  * ((y == 0 || y == height) ? 1 : 2)
463  * omega[i];
464 
465  sum *= 4*pad_num_pixels;
466 
467  for(i = 0; i < pad_num_pixels; i++)
468  omega[i] /= sum;
469 
470  if(!(plan = fftwf_plan_r2r_2d(height + 1, width + 1, omega, omega_trans,
471  FFTW_REDFT00, FFTW_REDFT00, FFTW_ESTIMATE | FFTW_DESTROY_INPUT)))
472  return 0;
473 
474  fftwf_execute(plan);
475  fftwf_destroy_plan(plan);
476 
477  /* Cut last row and column from KernelTrans */
478  for(y = 1, i = width; y < height; y++, i += width)
479  memmove(omega_trans + i, omega_trans + i + y, sizeof(float)*width);
480 
481  return 1;
482 }
483 
491 static void get_min_max(float *min_ptr, float *max_ptr,
492  const float *data, size_t num_samples)
493 {
494  float min, max;
495  size_t i;
496 
497  if(num_samples <= 0)
498  {
499  *min_ptr = *max_ptr = 0;
500  return;
501  }
502 
503  min = max = data[0];
504 
505  for(i = 1; i < num_samples; i++)
506  if(min > data[i])
507  min = data[i];
508  else if(max < data[i])
509  max = data[i];
510 
511  *min_ptr = min;
512  *max_ptr = max;
513 }
514 
515 
523 static void int_pow(float *dest, const float *src, size_t num_samples, int m)
524 {
525  size_t i;
526 
527  switch(m)
528  {
529  case 1:
530  memcpy(dest, src, sizeof(float)*num_samples);
531  break;
532  case 2:
533  for(i = 0; i < num_samples; i++)
534  {
535  float x = src[i];
536  dest[i] = x * x;
537  }
538  break;
539  case 3:
540  for(i = 0; i < num_samples; i++)
541  {
542  float x = src[i];
543  dest[i] = x * x * x;
544  }
545  break;
546  case 4:
547  for(i = 0; i < num_samples; i++)
548  {
549  float x = src[i];
550  float x2 = x * x;
551  dest[i] = x2 * x2;
552  }
553  break;
554  case 5:
555  for(i = 0; i < num_samples; i++)
556  {
557  float x = src[i];
558  float x2 = x * x;
559  dest[i] = x2 * x2 * x;
560  }
561  break;
562  case 6:
563  for(i = 0; i < num_samples; i++)
564  {
565  float x = src[i];
566  float x2 = x * x;
567  dest[i] = x2 * x2 * x2;
568  }
569  break;
570  case 7:
571  for(i = 0; i < num_samples; i++)
572  {
573  float x = src[i];
574  float x2 = x * x;
575  dest[i] = x2 * x2 * x2 * x;
576  }
577  break;
578  case 8:
579  for(i = 0; i < num_samples; i++)
580  {
581  float x = src[i];
582  float x2 = x * x;
583  float x4 = x2 * x2;
584  dest[i] = x4 * x4;
585  }
586  break;
587  case 9:
588  for(i = 0; i < num_samples; i++)
589  {
590  float x = src[i];
591  float x2 = x * x;
592  float x4 = x2 * x2;
593  dest[i] = x4 * x4 * x;
594  }
595  break;
596  case 10:
597  for(i = 0; i < num_samples; i++)
598  {
599  float x = src[i];
600  float x2 = x * x;
601  float x4 = x2 * x2;
602  dest[i] = x4 * x4 * x2;
603  }
604  break;
605  case 11:
606  for(i = 0; i < num_samples; i++)
607  {
608  float x = src[i];
609  float x2 = x * x;
610  float x4 = x2 * x2;
611  dest[i] = x4 * x4 * x2 * x;
612  }
613  break;
614  default:
615  for(i = 0; i < num_samples; i++)
616  dest[i] = (float)pow(src[i], m);
617  break;
618  }
619 }
620 
629 static void stretch(float *image, long num_pixels)
630 {
631  int channel;
632 
633 #ifdef _OPENMP
634 #pragma omp parallel for schedule(dynamic)
635 #endif
636  for(channel = 0; channel < 3; channel++)
637  {
638  float *dest = image + channel * num_pixels;
639  float min, max, Scale;
640  long n;
641 
642  get_min_max(&min, &max, dest, num_pixels);
643 
644  if(min < max)
645  for(n = 0, Scale = max - min; n < num_pixels; n++)
646  dest[n] = (dest[n] - min) / Scale;
647  }
648 }
649 
651 static void convolve(float *blurred_trans, const float *omega_trans,
652  long num_pixels, fftwf_plan forward_plan, fftwf_plan inverse_plan)
653 {
654  long i;
655 
656  fftwf_execute(forward_plan);
657 
658  for(i = 0; i < num_pixels; i++)
659  blurred_trans[i] *= omega_trans[i];
660 
661  fftwf_execute(inverse_plan);
662 }
663 
665 static int alloc_buffers_and_plans(float ***buffers_ptr,
666  fftwf_plan (**plans_ptr)[2], int width, int height, int num_buffers)
667 {
668  const long num_pixels = ((long)width) * ((long)height);
669  float **buffers;
670  fftwf_plan (*plans)[2];
671  int i;
672 
673  if(!(*buffers_ptr = buffers = (float **)
674  malloc(sizeof(float *)*num_buffers)))
675  return 0;
676 
677  for(i = 0; i < num_buffers; i++)
678  buffers[i] = NULL;
679 
680  if(!(*plans_ptr = plans = (fftwf_plan (*)[2])
681  malloc(sizeof(fftwf_plan)*2*num_buffers)))
682  return 0;
683 
684  for(i = 0; i < num_buffers; i++)
685  plans[i][0] = plans[i][1] = NULL;
686 
687  /* Allocate a workspace array and create FFTW transform plans. These will
688  be used for computing convolutions in parallel. */
689  for(i = 0; i < num_buffers; i++)
690  if(!(buffers[i] = (float *)
691  fftwf_malloc(sizeof(float)*num_pixels))
692  /* DCT-II on buffers[i] */
693  || !(plans[i][0] = fftwf_plan_r2r_2d(height, width, buffers[i],
694  buffers[i], FFTW_REDFT10, FFTW_REDFT10, FFTW_ESTIMATE))
695  /* DCT-III on buffers[i] */
696  || !(plans[i][1] = fftwf_plan_r2r_2d(height, width, buffers[i],
697  buffers[i], FFTW_REDFT01, FFTW_REDFT01, FFTW_ESTIMATE)))
698  return 0;
699 
700  return 1;
701 }
702 
704 static void free_buffers_and_plans(float **buffers,
705  fftwf_plan (*plans)[2], int num_buffers)
706 {
707  int i;
708 
709  for(i = 0; i < num_buffers; i++)
710  {
711  if(plans[i][1])
712  fftwf_destroy_plan(plans[i][1]);
713  if(plans[i][0])
714  fftwf_destroy_plan(plans[i][0]);
715  if(buffers[i])
716  fftwf_free(buffers[i]);
717  }
718 
719  if(plans)
720  free(plans);
721  if(buffers)
722  free(buffers);
723 }
724 
734 static double binom_coeff(int m, int n)
735 {
736  return factorial(m) / ( factorial(n) * factorial(m - n));
737 }
738 
748 static double factorial(int n)
749 {
750  static const double table[15] = {1, 1, 2, 6, 24, 120, 720, 5040, 40320,
751  362880, 3628800, 39916800, 479001600, 6227020800, 87178291200};
752 
753  if(n < 0)
754  return 0;
755  else if(n < 15)
756  return table[n];
757  else
758  return n * factorial(n - 1); /* Recursion for general n */
759 }