window如何採集回放設備聲音並重採樣

時間 2020-06-28

原文原文鏈接

在windows平臺下采集輸入設備的音頻數據資料已經不少了，可是採集聲卡回放設備的方法卻比較少，在此寫下本人開發的一個用於採集聲卡回放輸出設備（桌面聲音）的音頻數據，並作重採樣處理的功能模塊；固然同時也支持從輸入設備中採集音頻數據。windows

在實現過程當中使用了MMDevice API等較新的接口，該接口在windows vista以後的版本纔出現，因此在此提供的代碼只支持windows vista之後的版本，包括vista。api

因爲在windows下不一樣的聲卡能夠輸出不一樣的音頻格式，好比採樣率、位深、聲道數，因此從聲卡設備中獲取到的PCM數據格式與每臺PC的聲卡設置保持一致，所以必須在採集到PCM數據後統一作一次重採樣處理，後面提供的代碼中將最終輸出雙聲道，s1六、44100格式的PCM數據，以便後續處理。其中數據的重採樣藉助的是libsamplerate開源代碼庫，其可使用VS工具直接編譯。你們若是須要直接使用一下提供的代碼的話須要本身去下載並靜態編譯libsamplerate庫。ide

下面只貼出頭文件，其它代碼請到本人的已上傳資料中下載；
工具

#pragma once

#include <list>
#include <string>
#include <stdint.h>
#include <mmdeviceapi.h>
#include <Audioclient.h>
#include <propsys.h>
#include <Functiondiscoverykeys_devpkey.h>
#include "../libsamplerate/samplerate.h"

#pragma comment(lib, "libsamplerate.lib")

using namespace std;

#ifdef C64
typedef long long           PARAM;
typedef unsigned long long  UPARAM;
#else
typedef long                PARAM;
typedef unsigned long       UPARAM;
#endif


typedef char                *LPSTR;
typedef const char          *LPCSTR;
typedef wchar_t             *WSTR;
typedef const wchar_t       *CWSTR;
typedef TCHAR               *TSTR;
typedef const TCHAR         *CTSTR;

#define DEFAULT_SAMPLE_RATE 44100

#define KSAUDIO_SPEAKER_4POINT1     (KSAUDIO_SPEAKER_QUAD|SPEAKER_LOW_FREQUENCY)
#define KSAUDIO_SPEAKER_3POINT1     (KSAUDIO_SPEAKER_STEREO|SPEAKER_FRONT_CENTER|SPEAKER_LOW_FREQUENCY)
#define KSAUDIO_SPEAKER_2POINT1     (KSAUDIO_SPEAKER_STEREO|SPEAKER_LOW_FREQUENCY)

#define SafeRelease(var) if(var) {var->Release(); var = NULL;}

enum edges {
	edgeLeft = 0x01,
	edgeRight = 0x02,
	edgeTop = 0x04,
	edgeBottom = 0x08,
};

struct AudioDeviceInfo
{
	string strID;
	string strName;

	~AudioDeviceInfo() {strID.empty(); strName.empty();}
};

union TripleToLong
{
	LONG val;
	struct 
	{
		WORD wVal;
		BYTE tripleVal;
		BYTE lastByte;
	};
};

struct NotAResampler
{
	SRC_STATE	 *resampler;
	uint64_t     jumpRange;
};

enum AudioDeviceType {
	ADT_PLAYBACK,
	ADT_RECORDING
};

class CDesktopAudioDevice
{
public:
	CDesktopAudioDevice(void);
	~CDesktopAudioDevice(void);

	bool		Init(bool isPlayBack, const string devGUID = "Default");

	void		StartCapture();
	void		StopCapture();

	int			QueryAudioBuffer(string &outData);

	int			GetAudioDevices(list<AudioDeviceInfo *> &deviceList, AudioDeviceType deviceType, bool bConnectedOnly);
	bool		GetDefaultDevice(string &strVal, AudioDeviceType deviceType);

	bool		GetDefaultMicID(string &strVal);
	bool		GetDefaultSpeakerID(string &strVal);

protected:
	wchar_t*	MByteToWChar(uint32_t CodePage,LPCSTR lpcszSrcStr);
	char*		WCharToMByte(uint32_t CodePage,LPCWSTR lpcwszSrcStr);

	bool		GetNextBuffer(string &buffer, uint32_t &numFrames);

	void		FreeData();
	
protected:
	list<AudioDeviceInfo *> m_DeviceList;
	
	string					m_CurDeviceID;
	string					m_CurDeviceName;

	IMMDeviceEnumerator		*mmEnumerator_;
	IMMDevice				*mmDevice_;
	IAudioClient			*mmClient_;
	IAudioCaptureClient		*mmCapture_;

	uint32_t				m_SampleWindowSize;
	DWORD					m_InputChannelMask;
	WORD					m_InputChannels;
	uint32_t				m_InputSamplesPerSec;
	uint32_t				m_InputBufferSize;

	uint32_t				m_InputBitsPerSample;
	bool					m_bFloat;

	NotAResampler			*m_pResampler;
	double					m_ResampleRatio;

	uint8_t					*m_pBlankBuffer, *m_pTempResampleBuffer;
	uint8_t					*m_pDataOutputBuffer;
	uint8_t					*m_pTmpBuffer;
};