A Survey of Gaussian Convolution Algorithms
gaussian_conv_vyv.c
Go to the documentation of this file.
1 
20 #include <assert.h>
21 #include <math.h>
22 #include <stdlib.h>
23 #include "filter_util.h"
24 #include "complex_arith.h"
25 #include "invert_matrix.h"
26 #include "gaussian_short_conv.h"
27 #include "gaussian_conv_vyv.h"
28 
30 #define YVY_NUM_NEWTON_ITERATIONS 6
31 
40 static double variance(const complex *poles0, int K, double q)
41 {
42  complex sum = {0, 0};
43  int k;
44 
45  for (k = 0; k < K; ++k)
46  {
47  complex z = c_real_pow(poles0[k], 1/q), denom = z;
48  denom.real -= 1;
49  /* Compute sum += z / (z - 1)^2. */
50  sum = c_add(sum, c_div(z, c_mul(denom, denom)));
51  }
52 
53  return 2 * sum.real;
54 }
55 
66 static double dq_variance(const complex *poles0, int K, double q)
67 {
68  complex sum = {0, 0};
69  int k;
70 
71  for (k = 0; k < K; ++k)
72  {
73  complex z = c_real_pow(poles0[k], 1/q), w = z, denom = z;
74  w.real += 1;
75  denom.real -= 1;
76  /* Compute sum += z log(z) (z + 1) / (z - 1)^3 */
77  sum = c_add(sum, c_div(c_mul(c_mul(z, c_log(z)), w),
78  c_real_pow(denom, 3)));
79  }
80 
81  return (2 / q) * sum.real;
82 }
83 
99 static double compute_q(const complex *poles0, int K,
100  double sigma, double q0)
101 {
102  double sigma2 = sigma * sigma;
103  double q = q0;
104  int i;
105 
106  for (i = 0; i < YVY_NUM_NEWTON_ITERATIONS; ++i)
107  q -= (variance(poles0, K, q) - sigma2)
108  / dq_variance(poles0, K, q);
109 
110  return q;
111 }
112 
124 static void expand_pole_product(double *c, const complex *poles, int K)
125 {
126  complex denom[VYV_MAX_K + 1];
127  int k, j;
128 
129  assert(K <= VYV_MAX_K);
130  denom[0] = poles[0];
131  denom[1] = make_complex(-1, 0);
132 
133  for (k = 1; k < K; ++k)
134  {
135  denom[k + 1] = c_neg(denom[k]);
136 
137  for (j = k; j > 0; --j)
138  denom[j] = c_sub(c_mul(denom[j], poles[k]), denom[j - 1]);
139 
140  denom[0] = c_mul(denom[0], poles[k]);
141  }
142 
143  for (k = 1; k <= K; ++k)
144  c[k] = c_div(denom[k], denom[0]).real;
145 
146  for (c[0] = 1, k = 1; k <= K; ++k)
147  c[0] += c[k];
148 
149  return;
150 }
151 
181 void vyv_precomp(vyv_coeffs *c, double sigma, int K, num tol)
182 {
183  /* Optimized unscaled pole locations. */
184  static const complex poles0[VYV_MAX_K - VYV_MIN_K + 1][5] = {
185  {{1.4165, 1.00829}, {1.4165, -1.00829}, {1.86543, 0}},
186  {{1.13228, 1.28114}, {1.13228, -1.28114},
187  {1.78534, 0.46763}, {1.78534, -0.46763}},
188  {{0.8643, 1.45389}, {0.8643, -1.45389},
189  {1.61433, 0.83134}, {1.61433, -0.83134}, {1.87504, 0}}
190  };
191  complex poles[VYV_MAX_K];
192  double q, filter[VYV_MAX_K + 1];
193  double A[VYV_MAX_K * VYV_MAX_K], inv_A[VYV_MAX_K * VYV_MAX_K];
194  int i, j, matrix_size;
195 
196  assert(c && sigma > 0 && VYV_VALID_K(K) && tol > 0);
197 
198  /* Make a crude initial estimate of q. */
199  q = sigma / 2;
200  /* Compute an accurate value of q using Newton's method. */
201  q = compute_q(poles0[K - VYV_MIN_K], K, sigma, q);
202 
203  for (i = 0; i < K; ++i)
204  poles[i] = c_real_pow(poles0[K - VYV_MIN_K][i], 1/q);
205 
206  /* Compute the filter coefficients b_0, a_1, ..., a_K. */
207  expand_pole_product(filter, poles, K);
208 
209  /* Compute matrix for handling the right boundary. */
210  for (i = 0, matrix_size = K * K; i < matrix_size; ++i)
211  A[i] = 0.0;
212 
213  for (i = 0; i < K; ++i)
214  for (A[i + K * i] = 1.0, j = 1; j <= K; ++j)
215  A[i + K * extension(K, i + j)] += filter[j];
216 
217  invert_matrix(inv_A, A, K);
218 
219  for (i = 0; i < matrix_size; ++i)
220  inv_A[i] *= filter[0];
221 
222  /* Store precomputations in coeffs struct. */
223  for (i = 0; i <= K; ++i)
224  c->filter[i] = filter[i];
225 
226  for (i = 0; i < matrix_size; ++i)
227  c->M[i] = (num)inv_A[i];
228 
229  c->K = K;
230  c->sigma = (num)sigma;
231  c->tol = tol;
232  c->max_iter = (num)(10 * sigma);
233  return;
234 }
235 
260  num *dest, const num *src, long N, long stride)
261 {
262  const long stride_2 = stride * 2;
263  const long stride_3 = stride * 3;
264  const long stride_4 = stride * 4;
265  const long stride_5 = stride * 5;
266  const long stride_N = stride * N;
267  num q[VYV_MAX_K];
268  long i;
269  int m, n;
270 
271  assert(dest && src && N > 0 && stride != 0);
272 
273  if (N <= 4)
274  { /* Special case for very short signals. */
275  gaussian_short_conv(dest, src, N, stride, c.sigma);
276  return;
277  }
278 
279  /* Handle the left boundary. */
280  init_recursive_filter(q, src, N, stride,
281  c.filter, 0, c.filter, c.K, 1.0f, c.tol, c.max_iter);
282 
283  for (m = 0; m < c.K; ++m)
284  dest[stride * m] = q[m];
285 
286  /* The following applies the causal recursive filter according to the
287  filter order c.K. The loops implement the pseudocode
288 
289  For n = K, ..., N - 1,
290  dest(n) = filter(0) src(n) - \sum_{k=1}^K dest(n - k)
291 
292  Variable i = stride * n is the offset to the nth sample. */
293 
294  switch (c.K)
295  {
296  case 3:
297  for (i = stride_3; i < stride_N; i += stride)
298  dest[i] = c.filter[0] * src[i]
299  - c.filter[1] * dest[i - stride]
300  - c.filter[2] * dest[i - stride_2]
301  - c.filter[3] * dest[i - stride_3];
302  break;
303  case 4:
304  for (i = stride_4; i < stride_N; i += stride)
305  dest[i] = c.filter[0] * src[i]
306  - c.filter[1] * dest[i - stride]
307  - c.filter[2] * dest[i - stride_2]
308  - c.filter[3] * dest[i - stride_3]
309  - c.filter[4] * dest[i - stride_4];
310  break;
311  case 5:
312  for (i = stride_5; i < stride_N; i += stride)
313  dest[i] = c.filter[0] * src[i]
314  - c.filter[1] * dest[i - stride]
315  - c.filter[2] * dest[i - stride_2]
316  - c.filter[3] * dest[i - stride_3]
317  - c.filter[4] * dest[i - stride_4]
318  - c.filter[5] * dest[i - stride_5];
319  break;
320  }
321 
322  /* Handle the right boundary by multiplying matrix c.M with last K
323  samples dest(N - K - m), m = 0, ..., K - 1. */
324 
325  /* Copy last K samples into array q, q(m) = dest(N - K + m). */
326  for (m = 0; m < c.K; ++m)
327  q[m] = dest[stride_N - stride * (c.K - m)];
328 
329  /* Perform matrix multiplication,
330  dest(N - K + m) = \sum_{n=0}^{K-1} M(m, n) q(n). */
331  for (m = 0; m < c.K; ++m)
332  {
333  num accum = (num)0;
334 
335  for (n = 0; n < c.K; ++n)
336  accum += c.M[m + c.K*n] * q[n];
337 
338  dest[stride_N - stride * (c.K - m)] = accum;
339  }
340 
341  /* The following applies the anticausal filter, implementing the
342  pseudocode
343 
344  For n = N - K - 1, ..., 0,
345  dest(n) = filter(0) dest(n) - \sum_{k=1}^K dest(n + k)
346 
347  Variable i = stride * n is the offset to the nth sample. */
348  switch (c.K)
349  {
350  case 3:
351  for (i = stride_N - stride_4; i >= 0; i -= stride)
352  dest[i] = c.filter[0] * dest[i]
353  - c.filter[1] * dest[i + stride]
354  - c.filter[2] * dest[i + stride_2]
355  - c.filter[3] * dest[i + stride_3];
356  break;
357  case 4:
358  for (i = stride_N - stride_5; i >= 0; i -= stride)
359  dest[i] = c.filter[0] * dest[i]
360  - c.filter[1] * dest[i + stride]
361  - c.filter[2] * dest[i + stride_2]
362  - c.filter[3] * dest[i + stride_3]
363  - c.filter[4] * dest[i + stride_4];
364  break;
365  case 5:
366  for (i = stride_N - stride * 6; i >= 0; i -= stride)
367  dest[i] = c.filter[0] * dest[i]
368  - c.filter[1] * dest[i + stride]
369  - c.filter[2] * dest[i + stride_2]
370  - c.filter[3] * dest[i + stride_3]
371  - c.filter[4] * dest[i + stride_4]
372  - c.filter[5] * dest[i + stride_5];
373  break;
374  }
375 
376  return;
377 }
378 
397 void vyv_gaussian_conv_image(vyv_coeffs c, num *dest, const num *src,
398  int width, int height, int num_channels)
399 {
400  const long num_pixels = ((long)width) * ((long)height);
401  int x, y, channel;
402 
403  assert(dest && src && num_pixels > 0);
404 
405  /* Loop over the image channels. */
406  for (channel = 0; channel < num_channels; ++channel)
407  {
408  num *dest_y = dest;
409  const num *src_y = src;
410 
411  /* Filter each row of the channel. */
412  for (y = 0; y < height; ++y)
413  {
414  vyv_gaussian_conv(c, dest_y, src_y, width, 1);
415  dest_y += width;
416  src_y += width;
417  }
418 
419  /* Filter each column of the channel. */
420  for (x = 0; x < width; ++x)
421  vyv_gaussian_conv(c, dest + x, dest + x, height, width);
422 
423  dest += num_pixels;
424  src += num_pixels;
425  }
426 
427  return;
428 }