A Survey of Gaussian Convolution Algorithms
gaussian_conv_deriche.c
Go to the documentation of this file.
1 
20 #include "gaussian_conv_deriche.h"
21 #include <assert.h>
22 #include <math.h>
23 #include <stdlib.h>
24 #include <string.h>
25 #include "filter_util.h"
26 #include "complex_arith.h"
27 #include "gaussian_short_conv.h"
28 
29 #ifndef M_SQRT2PI
30 
31 #define M_SQRT2PI 2.50662827463100050241576528481104525
32 #endif
33 
34 static void make_filter(num *result_b, num *result_a,
35  const complex *alpha, const complex *beta, int K, double sigma);
36 
62 void deriche_precomp(deriche_coeffs *c, double sigma, int K, num tol)
63 {
64  /* Deriche's optimized filter parameters. */
65  static const complex alpha[DERICHE_MAX_K - DERICHE_MIN_K + 1][4] = {
66  {{0.48145, 0.971}, {0.48145, -0.971}},
67  {{-0.44645, 0.5105}, {-0.44645, -0.5105}, {1.898, 0}},
68  {{0.84, 1.8675}, {0.84, -1.8675},
69  {-0.34015, -0.1299}, {-0.34015, 0.1299}}
70  };
71  static const complex lambda[DERICHE_MAX_K - DERICHE_MIN_K + 1][4] = {
72  {{1.26, 0.8448}, {1.26, -0.8448}},
73  {{1.512, 1.475}, {1.512, -1.475}, {1.556, 0}},
74  {{1.783, 0.6318}, {1.783, -0.6318},
75  {1.723, 1.997}, {1.723, -1.997}}
76  };
77  complex beta[DERICHE_MAX_K];
78 
79  int k;
80  double accum, accum_denom = 1.0;
81 
82  assert(c && sigma > 0 && DERICHE_VALID_K(K) && 0 < tol && tol < 1);
83 
84  for (k = 0; k < K; ++k)
85  {
86  double temp = exp(-lambda[K - DERICHE_MIN_K][k].real / sigma);
87  beta[k] = make_complex(
88  -temp * cos(lambda[K - DERICHE_MIN_K][k].imag / sigma),
89  temp * sin(lambda[K - DERICHE_MIN_K][k].imag / sigma));
90  }
91 
92  /* Compute the causal filter coefficients */
93  make_filter(c->b_causal, c->a, alpha[K - DERICHE_MIN_K], beta, K, sigma);
94 
95  /* Numerator coefficients of the anticausal filter */
96  c->b_anticausal[0] = (num)(0.0);
97 
98  for (k = 1; k < K; ++k)
99  c->b_anticausal[k] = c->b_causal[k] - c->a[k] * c->b_causal[0];
100 
101  c->b_anticausal[K] = -c->a[K] * c->b_causal[0];
102 
103  /* Impulse response sums */
104  for (k = 1; k <= K; ++k)
105  accum_denom += c->a[k];
106 
107  for (k = 0, accum = 0.0; k < K; ++k)
108  accum += c->b_causal[k];
109 
110  c->sum_causal = (num)(accum / accum_denom);
111 
112  for (k = 1, accum = 0.0; k <= K; ++k)
113  accum += c->b_anticausal[k];
114 
115  c->sum_anticausal = (num)(accum / accum_denom);
116 
117  c->sigma = (num)sigma;
118  c->K = K;
119  c->tol = tol;
120  c->max_iter = (num)ceil(10 * sigma);
121  return;
122 }
123 
145 static void make_filter(num *result_b, num *result_a,
146  const complex *alpha, const complex *beta, int K, double sigma)
147 {
148  const double denom = sigma * M_SQRT2PI;
150  int k, j;
151 
152  b[0] = alpha[0]; /* Initialize b/a = alpha[0] / (1 + beta[0] z^-1) */
153  a[0] = make_complex(1, 0);
154  a[1] = beta[0];
155 
156  for (k = 1; k < K; ++k)
157  { /* Add kth term, b/a += alpha[k] / (1 + beta[k] z^-1) */
158  b[k] = c_mul(beta[k], b[k-1]);
159 
160  for (j = k - 1; j > 0; --j)
161  b[j] = c_add(b[j], c_mul(beta[k], b[j - 1]));
162 
163  for (j = 0; j <= k; ++j)
164  b[j] = c_add(b[j], c_mul(alpha[k], a[j]));
165 
166  a[k + 1] = c_mul(beta[k], a[k]);
167 
168  for (j = k; j > 0; --j)
169  a[j] = c_add(a[j], c_mul(beta[k], a[j - 1]));
170  }
171 
172  for (k = 0; k < K; ++k)
173  {
174  result_b[k] = (num)(b[k].real / denom);
175  result_a[k + 1] = (num)a[k + 1].real;
176  }
177 
178  return;
179 }
180 
212  num *dest, num *buffer, const num *src, long N, long stride)
213 {
214  const long stride_2 = stride * 2;
215  const long stride_3 = stride * 3;
216  const long stride_4 = stride * 4;
217  const long stride_N = stride * N;
218  num *y_causal, *y_anticausal;
219  long i, n;
220 
221  assert(dest && buffer && src && buffer != src && N > 0 && stride != 0);
222 
223  if (N <= 4)
224  { /* Special case for very short signals. */
225  gaussian_short_conv(dest, src, N, stride, c.sigma);
226  return;
227  }
228 
229  /* Divide buffer into two buffers each of length N. */
230  y_causal = buffer;
231  y_anticausal = buffer + N;
232 
233  /* Initialize the causal filter on the left boundary. */
234  init_recursive_filter(y_causal, src, N, stride,
235  c.b_causal, c.K - 1, c.a, c.K, c.sum_causal, c.tol, c.max_iter);
236 
237  /* The following filters the interior samples according to the filter
238  order c.K. The loops below implement the pseudocode
239 
240  For n = K, ..., N - 1,
241  y^+(n) = \sum_{k=0}^{K-1} b^+_k src(n - k)
242  - \sum_{k=1}^K a_k y^+(n - k)
243 
244  Variable i tracks the offset to the nth sample of src, it is
245  updated together with n such that i = stride * n. */
246  switch (c.K)
247  {
248  case 2:
249  for (n = 2, i = stride_2; n < N; ++n, i += stride)
250  y_causal[n] = c.b_causal[0] * src[i]
251  + c.b_causal[1] * src[i - stride]
252  - c.a[1] * y_causal[n - 1]
253  - c.a[2] * y_causal[n - 2];
254  break;
255  case 3:
256  for (n = 3, i = stride_3; n < N; ++n, i += stride)
257  y_causal[n] = c.b_causal[0] * src[i]
258  + c.b_causal[1] * src[i - stride]
259  + c.b_causal[2] * src[i - stride_2]
260  - c.a[1] * y_causal[n - 1]
261  - c.a[2] * y_causal[n - 2]
262  - c.a[3] * y_causal[n - 3];
263  break;
264  case 4:
265  for (n = 4, i = stride_4; n < N; ++n, i += stride)
266  y_causal[n] = c.b_causal[0] * src[i]
267  + c.b_causal[1] * src[i - stride]
268  + c.b_causal[2] * src[i - stride_2]
269  + c.b_causal[3] * src[i - stride_3]
270  - c.a[1] * y_causal[n - 1]
271  - c.a[2] * y_causal[n - 2]
272  - c.a[3] * y_causal[n - 3]
273  - c.a[4] * y_causal[n - 4];
274  break;
275  }
276 
277  /* Initialize the anticausal filter on the right boundary. */
278  init_recursive_filter(y_anticausal, src + stride_N - stride, N, -stride,
279  c.b_anticausal, c.K, c.a, c.K, c.sum_anticausal, c.tol, c.max_iter);
280 
281  /* Similar to the causal filter code above, the following implements
282  the pseudocode
283 
284  For n = K, ..., N - 1,
285  y^-(n) = \sum_{k=1}^K b^-_k src(N - n - 1 - k)
286  - \sum_{k=1}^K a_k y^-(n - k)
287 
288  Variable i is updated such that i = stride * (N - n - 1). */
289  switch (c.K)
290  {
291  case 2:
292  for (n = 2, i = stride_N - stride_3; n < N; ++n, i -= stride)
293  y_anticausal[n] = c.b_anticausal[1] * src[i + stride]
294  + c.b_anticausal[2] * src[i + stride_2]
295  - c.a[1] * y_anticausal[n - 1]
296  - c.a[2] * y_anticausal[n - 2];
297  break;
298  case 3:
299  for (n = 3, i = stride_N - stride_4; n < N; ++n, i -= stride)
300  y_anticausal[n] = c.b_anticausal[1] * src[i + stride]
301  + c.b_anticausal[2] * src[i + stride_2]
302  + c.b_anticausal[3] * src[i + stride_3]
303  - c.a[1] * y_anticausal[n - 1]
304  - c.a[2] * y_anticausal[n - 2]
305  - c.a[3] * y_anticausal[n - 3];
306  break;
307  case 4:
308  for (n = 4, i = stride_N - stride * 5; n < N; ++n, i -= stride)
309  y_anticausal[n] = c.b_anticausal[1] * src[i + stride]
310  + c.b_anticausal[2] * src[i + stride_2]
311  + c.b_anticausal[3] * src[i + stride_3]
312  + c.b_anticausal[4] * src[i + stride_4]
313  - c.a[1] * y_anticausal[n - 1]
314  - c.a[2] * y_anticausal[n - 2]
315  - c.a[3] * y_anticausal[n - 3]
316  - c.a[4] * y_anticausal[n - 4];
317  break;
318  }
319 
320  /* Sum the causal and anticausal responses to obtain the final result. */
321  for (n = 0, i = 0; n < N; ++n, i += stride)
322  dest[i] = y_causal[n] + y_anticausal[N - n - 1];
323 
324  return;
325 }
326 
348  num *dest, num *buffer, const num *src,
349  int width, int height, int num_channels)
350 {
351  long num_pixels = ((long)width) * ((long)height);
352  int x, y, channel;
353 
354  assert(dest && buffer && src && num_pixels > 0);
355 
356  /* Loop over the image channels. */
357  for (channel = 0; channel < num_channels; ++channel)
358  {
359  num *dest_y = dest;
360  const num *src_y = src;
361 
362  /* Filter each row of the channel. */
363  for (y = 0; y < height; ++y)
364  {
366  dest_y, buffer, src_y, width, 1);
367  dest_y += width;
368  src_y += width;
369  }
370 
371  /* Filter each column of the channel. */
372  for (x = 0; x < width; ++x)
374  dest + x, buffer, dest + x, height, width);
375 
376  dest += num_pixels;
377  src += num_pixels;
378  }
379 
380  return;
381 }