學習Android多媒體的步驟:
1,Audio PCM &video YUV各類數據的處理,格式的封裝與裝換原理
2,多媒體的播放框架,nuplayer ,stagefright
3,音視頻分離 MediaExtractor
4,音頻編解碼(以AAC爲例)
5,視頻圖像編解碼(以H264爲例)
6,音視頻同步技術android
這一部分的學習以前,須要瞭解:
1,音視頻容器的概念,參考博文:
http://blog.csdn.net/leixiaohua1020/article/details/17934487
2,不一樣的視頻封裝格式標準(這裏以MP4文件分析),參考博文:
http://blog.csdn.net/chenchong_219/article/details/44263691
3,openmax IL框架
https://www.khronos.org/openmaxil
4,查看視頻文件工具:算法
ultraedit 一個文本編輯器
Elecard Video Format Analyzer視頻格式分析器,能夠看到視頻每一個box的各個元素的說明,偏移值,大小等信息。經過某些具體的box能夠查詢到視頻的格式信息。網絡
=============如下是正文部分====================app
序列圖說明(如下標號表明序列圖中的交互序列編號):框架
交互1,nuplayer::setDataSourceAsync
從MediaPlayer setDataSource開始,實質是調用
setDataSourceAsync(int fd, int64_t offset, int64_t length),不一樣的播放方式,參數不同。
主要工做是:異步
交互2~4 :建立一個GenericSource,同時將獲取的參數經過GenericSource::setDataSource傳遞
交互5: 發送消息kWhatSetDataSource給 nuplayer(AHandler)處理事件。主要是
將得到的nuplayer::Source(GenericSource)賦值給snuplayer::mSource
發送消息給NuPlayerDriver,告訴上層setDataSource完成,提示上層能夠開始下一步指令。見交互6:driver->notifySetDataSourceCompleted編輯器
交互8:Nuplayer::prepareAsyncide
上層獲得設置誰完成的消息以後,調用這個函數開始下一步的指令,主要工做是:函數
交互 9, :發送消息kWhatPrepare給Nuplayer(AHandler)工具
交互10 :nuplayer收到消息後,操做mSource (也是一個AHandler),在這個離職中間,實質是調用NuPlayer::GenericSource::prepareAsync(),主要工做是:
給Souece建立一個ALooper,用來循環接收處理AMessage
發送消息kWhatPrepareAsync給Source(AHandler)開始異步準備
交互13~17:這裏纔是重點
交互13:GenericSource::initFromDataSource
後面還將具體分析這個函數的其餘重要工做
1,根據sniff建立指定的mediaExtractor,建立同時讀取數據,建立metaData,解析「track」而且分離
2,根據track,初始化mVideoTrack和mAudioTrack,加入 mSources
3,從metaData獲取
kKeyDuration
kKeyBitRate
交互16:sp MediaExtractor::CreateFromService
主要工做是遍歷全部註冊的Extractor,分別去讀取文件頭,根據條件判斷具體選用哪一個Extractor,以及初始化minetype,具體看下面:
交互17:DataSource::RegisterDefaultSniffers()
// The sniffer can optionally fill in "meta" with an AMessage containing // a dictionary of values that helps the corresponding extractor initialize // its state without duplicating effort already exerted by the sniffer. typedef bool (*SnifferFunc)( const sp<DataSource> &source, String8 *mimeType, float *confidence, sp<AMessage> *meta); // static void DataSource::RegisterSniffer_l(SnifferFunc func) { for (List<SnifferFunc>::iterator it = gSniffers.begin(); it != gSniffers.end(); ++it) { if (*it == func) { return; } } gSniffers.push_back(func); } // static void DataSource::RegisterDefaultSniffers() { Mutex::Autolock autoLock(gSnifferMutex); if (gSniffersRegistered) { return; } /*實質就是將左右的extractor註冊而且保存在DataSource::gSniffers(Vector)中間 可見,若是須要自定義一個IMediaExtrector的派生類,則必須實現這個方法,這個方法具體什麼做用,看下面分析 */ RegisterSniffer_l(SniffMPEG4); RegisterSniffer_l(SniffMatroska); RegisterSniffer_l(SniffOgg); RegisterSniffer_l(SniffWAV); RegisterSniffer_l(SniffFLAC); RegisterSniffer_l(SniffAMR); RegisterSniffer_l(SniffMPEG2TS); RegisterSniffer_l(SniffMP3); RegisterSniffer_l(SniffAAC); RegisterSniffer_l(SniffMPEG2PS);+ if (getuid() == AID_MEDIA) { // WVM only in the media server process RegisterSniffer_l(SniffWVM); } RegisterSniffer_l(SniffMidi); //RegisterSniffer_l(AVUtils::get()->getExtendedSniffer()); char value[PROPERTY_VALUE_MAX]; if (property_get("drm.service.enabled", value, NULL) && (!strcmp(value, "1") || !strcasecmp(value, "true"))) { RegisterSniffer_l(SniffDRM); } gSniffersRegistered = true; }
bool DataSource::sniff(
String8 *mimeType, float *confidence, sp<AMessage> *meta) { *mimeType = ""; *confidence = 0.0f; meta->clear(); int count =0; { Mutex::Autolock autoLock(gSnifferMutex); if (!gSniffersRegistered) { return false; } } for (List<SnifferFunc>::iterator it = gSniffers.begin(); it != gSniffers.end(); ++it) {//遍歷DataSource::gSniffers String8 newMimeType; float newConfidence; sp<AMessage> newMeta; if ((*it)(this, &newMimeType, &newConfidence, &newMeta)) { //執行每個已註冊的sniffXXX函數,比較全部返回true的sniffXXX函數中間,將confidence最大的那個的相關賦值,返回 if (newConfidence > *confidence) { *mimeType = newMimeType; *confidence = newConfidence; *meta = newMeta; } } count++; } return *confidence > 0.0; }
這個sniffXXX函數函數到底在作什麼?咱們以SniffMPEG4爲例,函數原型:
// Attempt to actually parse the 'ftyp' atom and determine if a suitable // compatible brand is present. // Also try to identify where this file's metadata ends // (end of the 'moov' atom) and report it to the caller as part of // the metadata. static bool BetterSniffMPEG4( const sp<DataSource> &source, String8 *mimeType, float *confidence, sp<AMessage> *meta) { // We scan up to 128 bytes to identify this file as an MP4. static const off64_t kMaxScanOffset = 128ll; off64_t offset = 0ll; bool foundGoodFileType = false; off64_t moovAtomEndOffset = -1ll; bool done = false; ALOGE("%s:begin>>>>>>>>>>>>",__FUNCTION__); while (!done && offset < kMaxScanOffset) { uint32_t hdr[2]; if (source->readAt(offset, hdr, 8) < 8) { return false; } //size爲1 說明這個是large size 只有"mdat" box 纔會有large size域 //size爲0 說明這個最後一個box uint64_t chunkSize = ntohl(hdr[0]);//大端轉小端,網絡字序轉主機字序 uint32_t chunkType = ntohl(hdr[1]); //box type off64_t chunkDataOffset = offset + 8; //box data 域的起始地址 if (chunkSize == 1) { //size爲1 說明這個是largesize if (source->readAt(offset + 8, &chunkSize, 8) < 8) { return false; } chunkSize = ntoh64(chunkSize); chunkDataOffset += 8; //只有"mdat" box 纔會有large size域 if (chunkSize < 16) { // The smallest valid chunk is 16 bytes long in this case. return false; } } else if (chunkSize < 8) { // The smallest valid chunk is 8 bytes long. return false; } // (data_offset - offset) is either 8 or 16 off64_t chunkDataSize = chunkSize - (chunkDataOffset - offset);//box data域的大小 if (chunkDataSize < 0) { ALOGE("b/23540914"); return ERROR_MALFORMED; } char chunkstring[5]; MakeFourCCString(chunkType, chunkstring); ALOGV("saw chunk type %s, size %" PRIu64 " @ %lld", chunkstring, chunkSize, (long long)offset); switch (chunkType) { case FOURCC('f', 't', 'y', 'p'): { if (chunkDataSize < 8) { //說明一個compatible_brand元素都沒有,每一個元素是4個字節 return false; } uint32_t numCompatibleBrands = (chunkDataSize - 8) / 4;//計算幾個brands,0開始計數 for (size_t i = 0; i < numCompatibleBrands + 2; ++i) { if (i == 1) { // Skip this index, it refers to the minorVersion, // not a brand. continue; } uint32_t brand; if (source->readAt( chunkDataOffset + 4 * i, &brand, 4) < 4) { return false; } brand = ntohl(brand); if (isCompatibleBrand(brand)) { foundGoodFileType = true; break; } } if (!foundGoodFileType) { return false; } break; } case FOURCC('m', 'o', 'o', 'v'): { moovAtomEndOffset = offset + chunkSize; done = true; break; } default: break; } offset += chunkSize; } //ALOGE("%s:END<<<<<<<<<"); // if (!foundGoodFileType) { return false; } *mimeType = MEDIA_MIMETYPE_CONTAINER_MPEG4; *confidence = 0.4f; if (moovAtomEndOffset >= 0) { *meta = new AMessage; (*meta)->setInt64("meta-data-size", moovAtomEndOffset); ALOGV("found metadata size: %lld", (long long)moovAtomEndOffset); } ALOGE("%s:END *mimeType(%s),*confidence(%.2f)<<<<<<<<<",__FUNCTION__,mimeType->string(),*confidence); return true; }
查看其餘Extractor文件的sniff方法,基本上就是檢查文件各個box信息,是否知足本Extractor的標準。若是是,就返回true,而且給相應的參數賦值,用於判斷選擇一個最佳的MediaExtractor對應的minetype,用於CreateFromService判斷到底初始化哪個MediaExtractor,最後初始化的是MPEG4Extractor。
void registerMediaExtractor(
const sp<IMediaExtractor> &extractor,
const sp<DataSource> &source,
const char *mime) {
ExtractorInstance ex; ex.mime = mime == NULL ? "NULL" : mime; ex.name = extractor->name(); ex.sourceDescription = source->toString(); ex.owner = IPCThreadState::self()->getCallingPid(); ex.extractor = extractor; { Mutex::Autolock lock(sExtractorsLock); if (sExtractors.size() > 10) { sExtractors.resize(10); } sExtractors.push_front(ex);//將建立的MediaExtractor放入static Vector<ExtractorInstance> sExtractors; ALOGE("ex.mime(%s),ex.sourceDescription(%s)",(ex.mime).string(),(ex.sourceDescription).string());//這個打印很重要,ex.sourceDescription能夠看到source很是重要的調試信息 } }
上面的流程圖說明,大概就是說明了MediaExtractor的建立過程,那分離分離音視頻是怎麼發聲的呢?
1,根據sniff建立指定的mediaExtractor,建立同時讀取數據,建立metaData,解析「track」而且分離
2,根據track,初始化mVideoTrack和mAudioTrack,加入 mSources
建立extractor的過程,上面已經分析了。那分離是如何作到的呢?
status_t NuPlayer::GenericSource::initFromDataSource() { sp<IMediaExtractor> extractor; String8 mimeType; float confidence; sp<AMessage> dummy; bool isWidevineStreaming = false; CHECK(mDataSource != NULL); //1,建立Extractor extractor = MediaExtractor::Create(mDataSource, mimeType.isEmpty() ? NULL : mimeType.string(), mIsStreaming ? 0 : AVNuUtils::get()->getFlags()); //2,獲取metaData,主要是看kKeyDuration是否已經被設置 mFileMeta = extractor->getMetaData(); int32_t totalBitrate = 0; //3,計算文件文件數據中間的Track數量,實質是讀取文件中間的box,不一樣的標準格式不一樣,以MPEG4Extractor爲例,查看MPEG4Extractor::readMetaData()遍歷文件。 size_t numtracks = extractor->countTracks(); //4,遍歷文件中間的track,給mVideoTrack和mAudioTrack賦值 for (size_t i = 0; i < numtracks; ++i) { //4.1,根據索引得到track,原型sp<IMediaSource> MPEG4Extractor::getTrack(size_t index),返回一個封裝track的MPEG4Source sp<IMediaSource> track = extractor->getTrack(i); //4.2,仍是經過讀文件,給以下字段賦值以後,封裝成一個MetaData返回。 sp<MetaData> meta = extractor->getTrackMetaData(i); const char *mime; CHECK(meta->findCString(kKeyMIMEType, &mime)); // Do the string compare immediately with "mime", // we can't assume "mime" would stay valid after another // extractor operation, some extractors might modify meta // during getTrack() and make it invalid. //4.3,判斷track的minetype,肯定是Audio仍是Video,每一個track有一個MetaData,每一個文件有一個MedaData if (!strncasecmp(mime, "audio/", 6)) { if (mAudioTrack.mSource == NULL) { mAudioTrack.mIndex = i; mAudioTrack.mSource = track; mAudioTrack.mPackets = new AnotherPacketSource(mAudioTrack.mSource->getFormat()); if (!strcasecmp(mime, MEDIA_MIMETYPE_AUDIO_VORBIS)) { mAudioIsVorbis = true; } else { mAudioIsVorbis = false; } if (AVNuUtils::get()->isByteStreamModeEnabled(meta)) { mIsByteMode = true; } } } else if (!strncasecmp(mime, "video/", 6)) { if (mVideoTrack.mSource == NULL) { mVideoTrack.mIndex = i; mVideoTrack.mSource = track; mVideoTrack.mPackets = new AnotherPacketSource(mVideoTrack.mSource->getFormat()); // check if the source requires secure buffers int32_t secure; if (meta->findInt32(kKeyRequiresSecureBuffers, &secure) && secure) { mIsSecure = true; if (mUIDValid) { extractor->setUID(mUID); } } } } //4.4,初始完成後,放入mSources mSources.push(track); int64_t durationUs; if (meta->findInt64(kKeyDuration, &durationUs)) { if (durationUs > mDurationUs) { mDurationUs = durationUs; } } int32_t bitrate; if (totalBitrate >= 0 && meta->findInt32(kKeyBitRate, &bitrate)) { totalBitrate += bitrate; } else { totalBitrate = -1; } } if (mSources.size() == 0) { ALOGE("b/23705695"); return UNKNOWN_ERROR; } mBitrate = totalBitrate; ALOGE("%s: END",__FUNCTION__); return OK; }
具體如何分離,還須要根據具體的MediaExtractor對應的格式來看,可是流程都是同樣的,只是具體的實現取決於具體的格式標準解析
關於DataSource和MediaSource
sp<IMediaExtractor> MediaExtractor::Create(
const sp<DataSource> &source, const char *mime, const uint32_t flags) |——sp<IMediaExtractor> MediaExtractorService::makeExtractor( const sp<IDataSource> &remoteSource, const char *mime, const uint32_t extFlags)//存在RemoteDataSource::wrap封裝裝換IDataSource |——CreateFromIDataSource(const sp<IDataSource> &source) |——sp<MediaExtractor> MediaExtractor::CreateFromService( const sp<DataSource> &source, const char *mime, const uint32_t flags) |——new MPEG4Extractor(source); // static sp<IMediaExtractor> MediaExtractor::Create( const sp<DataSource> &source, const char *mime, const uint32_t flags) { // remote extractor sp<IMediaExtractorService> mediaExService(interface_cast<IMediaExtractorService>(binder)); sp<IMediaExtractor> ex = mediaExService->makeExtractor(RemoteDataSource::wrap(source), mime, flags);//將DataSource裝飾成了IDataSource類型 return ex; } //將DataSource封裝成IDataSource的派生類 sp<IDataSource> RemoteDataSource::wrap(const sp<DataSource> &source) { return new RemoteDataSource(source); } //將IDataSource封裝成DataSource sp<DataSource> DataSource::CreateFromIDataSource(const sp<IDataSource> &source) { return new TinyCacheSource(new CallbackDataSource(source)); }
從MediaExtractor中間打印出來的source封裝描述:
ex.sourceDescription(TinyCacheSource(CallbackDataSource(RemoteDataSource(FileSource(Success.mp3)
從上面的層層封裝,能夠看到
1,具體的封裝器如MPEG4Extractor 是操做DataSource,
DataSource 會去調用操做調用IDataSource
2,DataSource能夠理解爲視頻文件的描述(如FileSource)
IDataSource能夠理解爲對DataSource和IMemory之間的映射描述
3,GenericSource(NuPlayer::Source的派生類),會去操做IMediaSource實現對文件的讀寫操做
4,IMediaSource的派生類,對應的是音視頻文件中間 track box「trak「的封裝,具體的MediaExtractor須要實現一個IMediaSource,用來實現對問價音視頻解析出來的track進行封裝,如:
class MPEG4Source : public MediaSource
若是須要重寫一個MediaExtractor,須要:
1,實現一個MediaSource的子類,解析文件,同時描述特定封裝格式的全部track,實例化的時候,就開始瞭解析過程
2,實現一個DataSource子類,如MPEG4DataSource,實質是對傳入的DataSource的封裝與適配
// This custom data source wraps an existing one and satisfies requests // falling entirely within a cached range from the cache while forwarding // all remaining requests to the wrapped datasource. // This is used to cache the full sampletable metadata for a single track, // possibly wrapping multiple times to cover all tracks, i.e. // Each MPEG4DataSource caches the sampletable metadata for a single track.
3,實現一個sniffXXX方法,註冊到DataSource中間,用來獨取文件特定信息,判斷播放文件是否能夠用該自定義的MediaExtractor
4,實現一個MediaExtractor的子類,實現相關函數,用來給nuplayer提供音視頻track的metadata信息
5,按照該封裝格式的標準,解析音視頻box的算法流程(MediaSource功能之一)
MediaSource.h
namespace android {
class MediaBuffer; class MetaData; struct MediaSource : public BnMediaSource { MediaSource(); // To be called before any other methods on this object, except // getFormat(). virtual status_t start(MetaData *params = NULL) = 0; // Any blocking read call returns immediately with a result of NO_INIT. // It is an error to call any methods other than start after this call // returns. Any buffers the object may be holding onto at the time of // the stop() call are released. // Also, it is imperative that any buffers output by this object and // held onto by callers be released before a call to stop() !!! virtual status_t stop() = 0; // Returns the format of the data output by this media source. virtual sp<MetaData> getFormat() = 0; // Returns a new buffer of data. Call blocks until a // buffer is available, an error is encountered of the end of the stream // is reached. // End of stream is signalled by a result of ERROR_END_OF_STREAM. // A result of INFO_FORMAT_CHANGED indicates that the format of this // MediaSource has changed mid-stream, the client can continue reading // but should be prepared for buffers of the new configuration. virtual status_t read( MediaBuffer **buffer, const ReadOptions *options = NULL) = 0; // Causes this source to suspend pulling data from its upstream source // until a subsequent read-with-seek. This is currently not supported // as such by any source. E.g. MediaCodecSource does not suspend its // upstream source, and instead discard upstream data while paused. virtual status_t pause() { return ERROR_UNSUPPORTED; } // The consumer of this media source requests that the given buffers // are to be returned exclusively in response to read calls. // This will be called after a successful start() and before the // first read() call. // Callee assumes ownership of the buffers if no error is returned. virtual status_t setBuffers(const Vector<MediaBuffer *> & /* buffers */) { return ERROR_UNSUPPORTED; } protected: virtual ~MediaSource(); private: MediaSource(const MediaSource &); MediaSource &operator=(const MediaSource &); }; } // namespace android
MediaExtractor.h
namespace android {
class DataSource;
class MediaSource;
class MetaData;
class MediaExtractor : public BnMediaExtractor { public: static sp<IMediaExtractor> Create( const sp<DataSource> &source, const char *mime = NULL, const uint32_t flags = 0); static sp<MediaExtractor> CreateFromService( const sp<DataSource> &source, const char *mime = NULL, const uint32_t flags = 0); virtual size_t countTracks() = 0; virtual sp<IMediaSource> getTrack(size_t index) = 0; enum GetTrackMetaDataFlags { kIncludeExtensiveMetaData = 1 }; virtual sp<MetaData> getTrackMetaData( size_t index, uint32_t flags = 0) = 0; // Return container specific meta-data. The default implementation // returns an empty metadata object. virtual sp<MetaData> getMetaData(); enum Flags { CAN_SEEK_BACKWARD = 1, // the "seek 10secs back button" CAN_SEEK_FORWARD = 2, // the "seek 10secs forward button" CAN_PAUSE = 4, CAN_SEEK = 8, // the "seek bar" }; // If subclasses do _not_ override this, the default is // CAN_SEEK_BACKWARD | CAN_SEEK_FORWARD | CAN_SEEK | CAN_PAUSE virtual uint32_t flags() const; // for DRM void setDrmFlag(bool flag) { mIsDrm = flag; }; bool getDrmFlag() { return mIsDrm; } virtual char* getDrmTrackInfo(size_t trackID, int *len) { return NULL; } virtual void setUID(uid_t uid) { } virtual const char * name() { return "<unspecified>"; } virtual void setExtraFlags(uint32_t flags) {} protected: MediaExtractor(); virtual ~MediaExtractor() {} private: bool mIsDrm; MediaExtractor(const MediaExtractor &); MediaExtractor &operator=(const MediaExtractor &); }; } // namespace android