[PATCH] dsound: added a windowed-sinc resampler

Sun May 6 13:06:13 CDT 2012

This patch should finally close bug #14717.

Some facts and context, to avoid misunderstandings:

 * Wine already has a resampler that is used when the sample rates of
primary and secondary dsound buffers don't match.

 * The existing resampler is a zero-order hold - i.e. the worst
resampler in all existing software. It corrupts music far worse than
MP3.

 * This patch replaces the existing resampler with a more scientific
one, based on the windowed sinc function. A simple Gaussian window is
used.

Technical details:

 * The build process builds the fir.h file by running the tools/make_fir
script. I will not accept suggestions to include only fir.h without the
make_fir script, because from my viewpoint that would be a violation of
LGPL.

 * The quality is between Speex Q3 and Q4, because this is what Windows
XP does by default. Quality can be changed at compile time by editing
tools/make_fir. I won't object if this patch gets committed with
different (but reasonable) quality settings.

 * The CPU usage is similar to Speex Q4 (i.e. ~5-6% per stream on a Core
2 Duo E6420 @ 2.13 GHz). It can be reduced to 2-3% by compiling mixer.c
and dsound_convert.c with -O3 -ffast-math -msse2 (or -msse3) - gcc
autovectorizer does wonders! I won't object to a patch written by
somebody else that adds those flags with proper checking that the
compiler supports them.

Alternative approaches considered but abandoned:

 * The existing Speex resampler cannot be imported because of the
push/pull model mismatch. Wine needs to pull exactly N samples out of
the resampler, while Speex only allows to push some number of samples
into it (without any guarantee to accept all of them on the first try).

 * Using the system resampler & mixer is a nice idea in theory, as it
would prevent unneeded or even double resampling (once in Wine and once
in PulseAudio). WaveOut already implements this. However, this approach
cannot work for secondary buffers with DSBCAPS_CTRLFREQUENCY, and would
create problems like "1000 sliders (one slider per stream) instead of
one slider per app" with current per-stream volume control GUIs such as
pavucontrol.

-- 
Alexander E. Patrakov
-------------- next part --------------
>From 0a3a9e0aab6264415c643476adc2e27ffc9961dc Mon Sep 17 00:00:00 2001
From: "Alexander E. Patrakov" <patrakov at gmail.com>
Date: Sun, 22 Apr 2012 20:19:38 +0600
Subject: [PATCH] dsound: added a windowed-sinc resampler

---
 Make.vars.in                 |    1 +
 dlls/dsound/Makefile.in      |    8 ++
 dlls/dsound/dsound_private.h |    3 +-
 dlls/dsound/mixer.c          |  122 +++++++++++++++++++++++++++-----
 tools/make_fir               |  159 ++++++++++++++++++++++++++++++++++++++++++
 5 files changed, 275 insertions(+), 18 deletions(-)
 create mode 100755 tools/make_fir

diff --git a/Make.vars.in b/Make.vars.in
index 100594b..1eed53c 100644
--- a/Make.vars.in
+++ b/Make.vars.in
@@ -61,6 +61,7 @@ MKINSTALLDIRS   = $(top_srcdir)/tools/mkinstalldirs -m 755
 WINAPI_CHECK    = $(top_srcdir)/tools/winapi/winapi_check
 BUILDIMAGE      = $(top_srcdir)/tools/buildimage
 C2MAN           = $(top_srcdir)/tools/c2man.pl
+MAKEFIR         = $(top_srcdir)/tools/make_fir
 RUNTEST         = $(top_srcdir)/tools/runtest
 MAKECTESTS      = $(TOOLSDIR)/tools/make_ctests$(TOOLSEXT)
 MAKEXFTMPL      = $(TOOLSDIR)/tools/make_xftmpl$(TOOLSEXT)
diff --git a/dlls/dsound/Makefile.in b/dlls/dsound/Makefile.in
index 8258637..94940ce 100644
--- a/dlls/dsound/Makefile.in
+++ b/dlls/dsound/Makefile.in
@@ -19,3 +19,11 @@ IDL_R_SRCS = dsound_classes.idl
 RC_SRCS = version.rc
 
 @MAKE_DLL_RULES@
+
+depend: fir.h
+
+fir.h: $(MAKEFIR)
+	$(MAKEFIR) fir.h
+
+clean::
+	$(RM) fir.h
diff --git a/dlls/dsound/dsound_private.h b/dlls/dsound/dsound_private.h
index 14296b0..8c6f60b 100644
--- a/dlls/dsound/dsound_private.h
+++ b/dlls/dsound/dsound_private.h
@@ -183,7 +183,8 @@ struct IDirectSoundBufferImpl
     DSBUFFERDESC                dsbd;
     /* used for frequency conversion (PerfectPitch) */
     ULONG                       freqneeded;
-    float freqAcc, freqAdjust;
+    DWORD                       firstep;
+    float freqAcc, freqAdjust, firgain;
     /* used for mixing */
     DWORD                       primary_mixpos, sec_mixpos;
 
diff --git a/dlls/dsound/mixer.c b/dlls/dsound/mixer.c
index 7f98e42..ec5a264 100644
--- a/dlls/dsound/mixer.c
+++ b/dlls/dsound/mixer.c
@@ -40,6 +40,7 @@
 #include "ks.h"
 #include "ksmedia.h"
 #include "dsound_private.h"
+#include "fir.h"
 
 WINE_DEFAULT_DEBUG_CHANNEL(dsound);
 
@@ -129,6 +130,24 @@ void DSOUND_RecalcFormat(IDirectSoundBufferImpl *dsb)
 	    && (IsEqualGUID(&pwfxe->SubFormat, &KSDATAFORMAT_SUBTYPE_IEEE_FLOAT))))
 		ieee = TRUE;
 
+	/**
+	 * Recalculate FIR step and gain.
+	 *
+	 * firstep says how many points of the FIR exist per one
+	 * sample in the secondary buffer. firgain specifies what
+	 * to multiply the FIR output by in order to attenuate it correctly.
+	 */
+	if (dsb->freqAdjust > 1.0f) {
+		/**
+		 * Yes, round it a bit to make sure that the
+		 * linear interpolation factor never changes.
+		 */
+		dsb->firstep = ceil(fir_step / dsb->freqAdjust);
+	} else {
+		dsb->firstep = fir_step;
+	}
+	dsb->firgain = (float)dsb->firstep / fir_step;
+
 	/* calculate the 10ms write lead */
 	dsb->writelead = (dsb->freq / 100) * dsb->pwfx->nBlockAlign;
 
@@ -228,28 +247,97 @@ static inline float get_current_sample(const IDirectSoundBufferImpl *dsb,
     return dsb->get(dsb, mixpos % dsb->buflen, channel);
 }
 
-/**
- * Copy frames from the given input buffer to the given output buffer.
- * Translate 8 <-> 16 bits and mono <-> stereo
- */
-static inline void cp_fields(IDirectSoundBufferImpl *dsb,
+static UINT cp_fields_noresample(IDirectSoundBufferImpl *dsb,
+        UINT ostride, UINT count)
+{
+    UINT istride = dsb->pwfx->nBlockAlign;
+    DWORD channel, i;
+    for (i = 0; i < count; i++)
+        for (channel = 0; channel < dsb->mix_channels; channel++)
+            dsb->put(dsb, i * ostride, channel, get_current_sample(dsb,
+                    dsb->sec_mixpos + i * istride, channel));
+    return count;
+}
+
+static UINT cp_fields_resample(IDirectSoundBufferImpl *dsb,
         UINT ostride, UINT count, float *freqAcc)
 {
-    DWORD ipos = dsb->sec_mixpos;
-    UINT istride = dsb->pwfx->nBlockAlign, i;
-    DWORD opos = 0;
+    UINT i, channel;
+    UINT istride = dsb->pwfx->nBlockAlign;
+
+    float freqAdjust = dsb->freqAdjust;
+    float freqAcc_start = *freqAcc;
+    float freqAcc_end = freqAcc_start + count * freqAdjust;
+    UINT dsbfirstep = dsb->firstep;
+    UINT channels = dsb->mix_channels;
+    UINT max_ipos = freqAcc_start + count * freqAdjust;
+
+    UINT fir_cachesize = (fir_len + dsbfirstep - 2) / dsbfirstep;
+    UINT required_input = max_ipos + fir_cachesize;
+
+    float* intermediate = HeapAlloc(GetProcessHeap(), 0,
+            sizeof(float) * required_input * channels);
+
+    float* fir_copy = HeapAlloc(GetProcessHeap(), 0,
+            sizeof(float) * fir_cachesize);
+
+    /* Important: this buffer MUST be non-interleaved
+     * if you want -msse3 to have any effect.
+     * This is good for CPU cache effects, too.
+     */
+    float* itmp = intermediate;
+    for (channel = 0; channel < channels; channel++)
+        for (i = 0; i < required_input; i++)
+            *(itmp++) = get_current_sample(dsb,
+                    dsb->sec_mixpos + i * istride, channel);
+
+    for(i = 0; i < count; ++i) {
+        float total_fir_steps = (freqAcc_start + i * freqAdjust) * dsbfirstep;
+        UINT int_fir_steps = total_fir_steps;
+        UINT ipos = int_fir_steps / dsbfirstep;
+
+        UINT idx = (ipos + 1) * dsbfirstep - int_fir_steps - 1;
+        float rem = int_fir_steps + 1.0 - total_fir_steps;
+
+        int fir_used = 0;
+        while (idx < fir_len - 1) {
+            fir_copy[fir_used++] = fir[idx] * (1.0 - rem) + fir[idx + 1] * rem;
+            idx += dsb->firstep;
+        }
 
-    for (i = 0; i < count; ++i){
-        DWORD channel;
-        for (channel = 0; channel < dsb->mix_channels; channel++)
-            dsb->put(dsb, opos, channel,
-                get_current_sample(dsb, ipos, channel));
-        *freqAcc += dsb->freqAdjust;
-        ipos += ((DWORD)*freqAcc) * istride;
-        *freqAcc -= truncf(*freqAcc);
-        opos += ostride;
+        assert(fir_used <= fir_cachesize);
+        assert(ipos + fir_used <= required_input);
+
+        for (channel = 0; channel < dsb->mix_channels; channel++) {
+            int j;
+            float sum = 0.0;
+            float* cache = &intermediate[channel * required_input + ipos];
+            for (j = 0; j < fir_used; j++)
+                sum += fir_copy[j] * cache[j];
+            dsb->put(dsb, i * ostride, channel, sum * dsb->firgain);
+        }
     }
 
+    freqAcc_end -= (int)freqAcc_end;
+    *freqAcc = freqAcc_end;
+
+    HeapFree(GetProcessHeap(), 0, fir_copy);
+    HeapFree(GetProcessHeap(), 0, intermediate);
+
+    return max_ipos;
+}
+
+static void cp_fields(IDirectSoundBufferImpl *dsb,
+        UINT ostride, UINT count, float *freqAcc)
+{
+    DWORD ipos, adv;
+
+    if (dsb->freqAdjust == 1.0)
+        adv = cp_fields_noresample(dsb, ostride, count); /* *freqAcc is unmodified */
+    else
+        adv = cp_fields_resample(dsb, ostride, count, freqAcc);
+
+    ipos = dsb->sec_mixpos + adv * dsb->pwfx->nBlockAlign;
     if (ipos >= dsb->buflen) {
         if (dsb->playflags & DSBPLAY_LOOPING)
             ipos %= dsb->buflen;
diff --git a/tools/make_fir b/tools/make_fir
new file mode 100755
index 0000000..dc901a8
--- /dev/null
+++ b/tools/make_fir
@@ -0,0 +1,159 @@
+#! /usr/bin/perl -w
+#              DirectSound
+#
+# Copyright 2011-2012 Alexander E. Patrakov
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
+
+use strict;
+use Math::Trig;
+
+# This program generates an array of Finite Impulse Response (FIR) filter
+# values for use in resampling audio.
+#
+# Values are based on the resampler from Windows XP at the default (best)
+# quality, reverse engineered by saving kvm output to a wav file.
+
+# Controls how sharp the transition between passband and stopband is.
+# The transition bandwidth is approximately (1 / exp_width) of the
+# Nyquist frequency.
+
+my $exp_width = 41.0;
+
+# Controls the stopband attenuation. It is related but not proportional
+# to exp(-(PI * lobes_per_wing / exp_width) ^2) / lobes_per_wing
+
+my $lobes_per_wing = 28;
+
+# Controls the position of the transition band and thus attenuation at the
+# Nyquist frequency and above. Amended below so that the length of the FIR is
+# an integer. Essentially, this controls the trade-off between good rejection
+# of unrepresentable frequencies (those above half of the lower of the sample
+# rates) and not rejecting the wanted ones. Windows XP errs on the side of
+# letting artifacts through, which somewhat makes sense if they are above
+# 20 kHz anyway, or in the case of upsampling, where we can assume that the
+# problematic frequencies are not in the input. This, however, doesn't match
+# what linux resamplers do - so set this to 0.85 to match them. 0.98 would
+# give Windows XP behaviour.
+
+my $approx_bandwidth = 0.85;
+
+# The amended value will be stored here
+my $bandwidth;
+
+# The number of points per time unit equal to one period of the original
+# Nyquist frequency. The more points, the less interpolation error is.
+my $fir_step = 120;
+
+
+# Here x is measured in half-periods of the lower sample rate
+sub fir_val($)
+{
+    my ($x) = @_;
+    $x *= pi * $bandwidth;
+    my $s = $x / $exp_width;
+    my $sinc = $x ? (sin($x) / $x) : 1.0;
+    my $gauss = exp(-($s * $s));
+    return $sinc * $gauss;
+}
+
+# Linear interpolation
+sub mlinear($$$)
+{
+    my ($y1, $y2, $mu) = @_;
+    return $y1 * (1.0 - $mu) + $y2 * $mu;
+}
+
+# to_db, for printing decibel values
+sub to_db($) {
+    my ($x) = @_;
+    return 20.0 * log(abs($x))/log(10.0);
+}
+
+my $wing_len = int($lobes_per_wing / $approx_bandwidth * $fir_step + 1);
+$bandwidth = 1.0 * $lobes_per_wing / $wing_len;
+
+my $amended_bandwidth = $bandwidth * $fir_step;
+my $fir_len = 2 * $wing_len + 1;
+my @fir;
+
+# Constructing the FIR is easy
+for (my $i = 0; $i < $fir_len; $i++) {
+    push @fir, fir_val($i - $wing_len);
+}
+
+# Now we have to test it and print some statistics to stderr.
+# Test 0: FIR size
+print STDERR "size: $fir_len\n";
+
+# Test 1: Interpolation noise. It should be less than -90 dB.
+
+# If you suspect that 0.5 is special due to some symmetry and thus yields
+# an abnormally low noise figure, change it. But really, it isn't special.
+my $testpoint = 0.5;
+
+my $exact_val = fir_val($testpoint);
+my $lin_approx_val = mlinear($fir[$wing_len], $fir[$wing_len + 1],
+        $testpoint);
+
+my $lin_error_db = to_db($exact_val - $lin_approx_val);
+
+printf STDERR "interpolation noise: %1.2f dB\n", $lin_error_db;
+
+# Test 2: Passband and stopband.
+# The filter gain, ideally, should be 0.00 dB below the Nyquist
+# frequency and -inf dB above it. But it is impossible. So
+# let's settle for -80 dB above 1.08 * f_Nyquist.
+
+my $sum = 0.0;
+$sum += $_ for @fir;
+
+# Frequencies in this list are expressed as fractions
+# of the Nyquist frequency.
+my @testfreqs = (0.5, 0.8, 1.0, 1.08, 1.18, 1.33, 1.38);
+foreach my $testfreq(@testfreqs) {
+    my $dct_coeff = 0.0;
+    for (my $i = 0; $i < $fir_len; $i++) {
+        my $x = 1.0 * ($i - $wing_len) / $fir_step;
+        $dct_coeff += $fir[$i] * cos($x * $testfreq * pi);
+    }
+    printf STDERR "DCT: %1.2f -> %1.2f dB\n",
+        $testfreq, to_db($dct_coeff / $sum);
+}
+
+# Now actually print the FIR to a C header file
+
+if ($#ARGV >= 0) {
+    my $outfile = $ARGV[0];
+    open FILE, ">$outfile";
+    select FILE;
+}
+
+print "/* generated by tools/make_fir */\n";
+print "static const int fir_len = $fir_len;\n";
+print "static const int fir_step = $fir_step;\n";
+print "static const float fir[] = {\n";
+
+for (my $i = 0; $i < $fir_len; $i++) {
+    printf "%10.10f", $amended_bandwidth * $fir[$i];
+    if ($i == $fir_len - 1) {
+        print "\n";
+    } elsif (($i + 1) % 5 == 0) {
+        print ",\n";
+    } else {
+        print ", ";
+    }
+}
+print "};\n";
-- 
1.7.8.6