WAV是一種以RIFF爲基礎的無壓縮音頻編碼格式,該格式以Header、Format Chunk及Data Chunk三部分構成。shell
本文簡要解析了各部分的構成要素,概述瞭如何使用C++對文件頭進行解析以及提取音頻數據。數組
上圖展現了WAV文件格式,包括每一field的大小與端序編碼
定義結構體WaveHeader來保存WAV文件頭,即Header、Format Chunk及Data Chunk的非data部分,此外在該結構體中添加了num_frame字段,用來保存文件總幀數,因爲Header、Format Chunk與Data Chunk之間可能有其餘說明信息,因此還添加了start_pos字段用來保存真正的data開始的位置。code
typedef struct WaveHeader { char chunk_id[4] = { 0 }; unsigned int chunk_size = 0; char format[4] = { 0 }; char fmt_chunk_id[4] = { 0 }; unsigned int fmt_chunk_size = 0; unsigned short audio_fomat = 0; unsigned short num_channels = 0; unsigned int sample_rate = 0; unsigned int byte_rate = 0; unsigned short block_align = 0; unsigned short bits_per_sample = 0; char data_chunk_id[4] = { 0 }; unsigned int data_chunk_size = 0; int num_frame = 0; int start_pos = 0; };
/* * fname: 文件路徑 * wh: 用來保存文件頭的結構體實例 */ void getHead(string fname, WaveHeader &wh) { /* *因爲事先並不知道文件大小,故定義足量大小的char數組覆蓋文件頭 *以後可根據提取到的ChunkSize來定義提取音頻數據用的數組 */ const int HEAD_LENGTH = 256 * 1024;//256kb char buf[HEAD_LENGTH]; FILE *stream; freopen_s(&stream, fname.c_str(), "rb", stderr); fread(buf, 1, HEAD_LENGTH, stream); //記錄文件讀取位置 int pos = 0; //尋找「RIFF」標記 while (pos < HEAD_LENGTH) { if (buf[pos] == 'R'&&buf[pos + 1] == 'I'&&buf[pos + 2] == 'F'&buf[pos + 3] == 'F') { wh.chunk_id[0] = 'R'; wh.chunk_id[1] = 'I'; wh.chunk_id[2] = 'F'; wh.chunk_id[3] = 'F'; pos += 4; break; } ++pos; } //讀取Header部分 wh.chunk_size = *(int *)&buf[pos]; pos += 4; wh.format[0] = buf[pos]; wh.format[1] = buf[pos + 1]; wh.format[2] = buf[pos + 2]; wh.format[3] = buf[pos + 3]; pos += 4; //尋找「fmt」標記 while (pos < HEAD_LENGTH) { if (buf[pos] == 'f'&&buf[pos + 1] == 'm'&&buf[pos + 2] == 't') { wh.fmt_chunk_id[0] = 'f'; wh.fmt_chunk_id[1] = 'm'; wh.fmt_chunk_id[2] = 't'; pos += 4; break; } ++pos; } //讀取Format Chunk部分 wh.fmt_chunk_size = *(int *)&buf[pos]; pos += 4; wh.audio_fomat = *(short *)&buf[pos]; pos += 2; wh.num_channels = *(short *)&buf[pos]; pos += 2; wh.sample_rate = *(int *)&buf[pos]; pos += 4; wh.byte_rate = *(int *)&buf[pos]; pos += 4; wh.block_align = *(short *)&buf[pos]; pos += 2; wh.bits_per_sample = *(short *)&buf[pos]; pos += 2; //尋找「data」標記 while (pos < HEAD_LENGTH) { if (buf[pos] == 'd'&&buf[pos + 1] == 'a'&&buf[pos + 2] == 't'&buf[pos + 3] == 'a') { wh.data_chunk_id[0] = 'd'; wh.data_chunk_id[1] = 'a'; wh.data_chunk_id[2] = 't'; wh.data_chunk_id[3] = 'a'; pos += 4; break; } ++pos; } //讀取Data Chunk的非data部分 wh.data_chunk_size = *(int *)&buf[pos]; pos += 4; //記錄真正音頻數據的開始位置 wh.start_pos = pos; //計算文件總幀數 wh.num_frame = wh.data_chunk_size / (wh.num_channels*(wh.bits_per_sample / 8)); }
/* * fname: 文件路徑 * wh: 對應的文件頭結構體實例 */ void getData(string fname, WaveHeader &wh){ //記錄文件讀取位置 int pos = wh.start_pos; //爲加快處理速度,根據ChunkSize將文件一次讀入內存 FILE *stream; freopen_s(&stream, fname.c_str(), "rb", stderr); char* file_data = new char[wh.chunk_size + 8]; fread(file_data, 1, wh.chunk_size + 8, stream); //以每幀2字節爲例 short left_data; short right_data; while(pos < wh.start_pos + wh.data_chunk_size){ left_data = *(short*)&file_data[pos]; //TODO: 處理左聲道數據 pos += 2; right_data = *(short*)&file_data[pos]; //TODO: 處理右聲道數據 pos += 2; } }