萬能的google能夠找到這個關於protobuf的簡介,從實現說明上來看,並無特別值得說明的地方。對於一個協議或者存儲來講,最爲關心的實際上是協議或存儲的兼容性問題,其它的int變長編碼並無什麼特殊的,由於在這以前,utf-8之類的變長編碼也一樣使用相似的方法來進行編解碼來節省流量。
看了說明以後,所謂的版本兼容須要的也只是爲每一個字段定義一個永久的惟一數字ID,每一個字段的ID定義以後不能修改。在編碼過程當中,在每一個字段(field)前加上一個tag,用於表示接下來字段的編碼;對於讀取方來講,一樣是根據這個ID來肯定接下來的字段如何解釋,若是本身本地不識別這個ID,能夠丟棄。可是這點一樣也早就存在,在IP層和TCP層的options字段存儲,使用的一樣是這樣相似的編碼方式:node
0 1 2 3
0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
|Ver= 4 |IHL= 8 |Type of Service| Total Length = 576 |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| Identification = 111 |Flg=0| Fragment Offset = 0 |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| Time = 123 | Protocol = 6 | Header Checksum |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| source address |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| destination address |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| Opt. Code = x | Opt. Len.= 3 | option value | Opt. Code = x |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| Opt. Len. = 4 | option value | Opt. Code = 1 |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| Opt. Code = y | Opt. Len. = 3 | option value | Opt. Code = 0 |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| data |
\ \
\ \
| data |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| data |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+linux
Example Internet Datagramc++
google官方文檔中對於該問題的說明git
Extending a Protocol Buffergithub
Sooner or later after you release the code that uses your protocol buffer, you will undoubtedly want to "improve" the protocol buffer's definition. If you want your new buffers to be backwards-compatible, and your old buffers to be forward-compatible – and you almost certainly do want this – then there are some rules you need to follow. In the new version of the protocol buffer:數組
you must not change the tag numbers of any existing fields.
you must not add or delete any required fields.
you may delete optional or repeated fields.
you may add new optional or repeated fields but you must use fresh tag numbers (i.e. tag numbers that were never used in this protocol buffer, not even by deleted fields)app
因爲這個工具大體看來的確沒有什麼能夠深刻挖掘的,因此仍是先看下構建過程吧,按照工程的標準文檔執行就好了。
tsecer@protobuf: sh autogen.sh
+ mkdir -p third_party/googletest/m4
+ autoreconf -f -i -Wall,no-obsolete
……
tsecer@protobuf: ./configure --prefix=/data1/harry/work/protobuf-master/protolib/ CXXFLAGS=-g
checking whether to enable maintainer-specific portions of Makefiles... yes
checking build system type... x86_64-unknown-linux-gnu
checking host system type... x86_64-unknown-linux-gnu
checking target system type... x86_64-unknown-linux-gnu
……
tsecer@protobuf: make
……
tsecer@protobuf: ls ./src/protoc
./src/protocless
tsecer@protobuf: g++ -std=c++11 *.cc -lprotobuf -g
tsecer@protobuf:
執行生成的可執行文件
./LD_LIBRARY_PATH=/usr/local/lib/ ./a.out ide
這裏使用的實際上是protobuf自帶的例子,在protobuf-master\examples\addressbook.proto,protobuf-master\examples\add_person.cc
tsecer@protobuf: cat demo.proto
syntax = "proto2";工具
package tutorial;
message Person {
required string name = 1;
required int32 id = 2;
optional string email = 3;
enum PhoneType {
MOBILE = 0;
HOME = 1;
WORK = 2;
}
message PhoneNumber {
required string number = 1;
optional PhoneType type = 2 [default = HOME];
}
repeated PhoneNumber phones = 4;
}
message AddressBook {
repeated Person people = 1;
}
tsecer@protobuf: ../src/protoc -I. --cpp_out=. demo.proto
tsecer@protobuf: ls
demo.pb.cc demo.pb.h demo.proto
tsecer@protobuf:
生成的內容:
因爲number是一個string,因此增長了一些字符串之類的操做接口
// required string number = 1;
bool has_number() const;
void clear_number();
static const int kNumberFieldNumber = 1;
const std::string& number() const;
void set_number(const std::string& value);
void set_number(std::string&& value);
void set_number(const char* value);
void set_number(const char* value, size_t size);
std::string* mutable_number();
std::string* release_number();
void set_allocated_number(std::string* number);
……
// required int32 id = 2;
bool has_id() const;
void clear_id();
static const int kIdFieldNumber = 2;
::PROTOBUF_NAMESPACE_ID::int32 id() const;
void set_id(::PROTOBUF_NAMESPACE_ID::int32 value);
bool Person::MergePartialFromCodedStream(
::PROTOBUF_NAMESPACE_ID::io::CodedInputStream* input) {
#define DO_(EXPRESSION) if (!PROTOBUF_PREDICT_TRUE(EXPRESSION)) goto failure
::PROTOBUF_NAMESPACE_ID::uint32 tag;
// @@protoc_insertion_point(parse_start:tutorial.Person)
for (;;) {
::std::pair<::PROTOBUF_NAMESPACE_ID::uint32, bool> p = input->ReadTagWithCutoffNoLastTag(127u);
tag = p.first;
if (!p.second) goto handle_unusual;
switch (::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::GetTagFieldNumber(tag)) {
// required string name = 1;
case 1: {
if (static_cast< ::PROTOBUF_NAMESPACE_ID::uint8>(tag) == (10 & 0xFF)) {
DO_(::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::ReadString(
input, this->mutable_name()));
::PROTOBUF_NAMESPACE_ID::internal::WireFormat::VerifyUTF8StringNamedField(
this->name().data(), static_cast<int>(this->name().length()),
::PROTOBUF_NAMESPACE_ID::internal::WireFormat::PARSE,
"tutorial.Person.name");
} else {
goto handle_unusual;
}
break;
}
// required int32 id = 2;
case 2: {
if (static_cast< ::PROTOBUF_NAMESPACE_ID::uint8>(tag) == (16 & 0xFF)) {
HasBitSetters::set_has_id(this);
DO_((::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::ReadPrimitive<
::PROTOBUF_NAMESPACE_ID::int32, ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::TYPE_INT32>(
input, &id_)));
} else {
goto handle_unusual;
}
break;
}
// optional string email = 3;
case 3: {
if (static_cast< ::PROTOBUF_NAMESPACE_ID::uint8>(tag) == (26 & 0xFF)) {
DO_(::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::ReadString(
input, this->mutable_email()));
::PROTOBUF_NAMESPACE_ID::internal::WireFormat::VerifyUTF8StringNamedField(
this->email().data(), static_cast<int>(this->email().length()),
::PROTOBUF_NAMESPACE_ID::internal::WireFormat::PARSE,
"tutorial.Person.email");
} else {
goto handle_unusual;
}
break;
}
// repeated .tutorial.Person.PhoneNumber phones = 4;
case 4: {
if (static_cast< ::PROTOBUF_NAMESPACE_ID::uint8>(tag) == (34 & 0xFF)) {
DO_(::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::ReadMessage(
input, add_phones()));
} else {
goto handle_unusual;
}
break;
}
default: {
handle_unusual:
if (tag == 0) {
goto success;
}
DO_(::PROTOBUF_NAMESPACE_ID::internal::WireFormat::SkipField(
input, tag, _internal_metadata_.mutable_unknown_fields()));
break;
}
}
}
success:
// @@protoc_insertion_point(parse_success:tutorial.Person)
return true;
failure:
// @@protoc_insertion_point(parse_failure:tutorial.Person)
return false;
#undef DO_
}
void Person::SerializeWithCachedSizes(
::PROTOBUF_NAMESPACE_ID::io::CodedOutputStream* output) const {
// @@protoc_insertion_point(serialize_start:tutorial.Person)
::PROTOBUF_NAMESPACE_ID::uint32 cached_has_bits = 0;
(void) cached_has_bits;
cached_has_bits = _has_bits_[0];
// required string name = 1;
if (cached_has_bits & 0x00000001u) {
::PROTOBUF_NAMESPACE_ID::internal::WireFormat::VerifyUTF8StringNamedField(
this->name().data(), static_cast<int>(this->name().length()),
::PROTOBUF_NAMESPACE_ID::internal::WireFormat::SERIALIZE,
"tutorial.Person.name");
::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::WriteStringMaybeAliased(
1, this->name(), output);
}
// required int32 id = 2;
if (cached_has_bits & 0x00000004u) {
::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::WriteInt32(2, this->id(), output);
}
// optional string email = 3;
if (cached_has_bits & 0x00000002u) {
::PROTOBUF_NAMESPACE_ID::internal::WireFormat::VerifyUTF8StringNamedField(
this->email().data(), static_cast<int>(this->email().length()),
::PROTOBUF_NAMESPACE_ID::internal::WireFormat::SERIALIZE,
"tutorial.Person.email");
::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::WriteStringMaybeAliased(
3, this->email(), output);
}
// repeated .tutorial.Person.PhoneNumber phones = 4;
for (unsigned int i = 0,
n = static_cast<unsigned int>(this->phones_size()); i < n; i++) {
::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::WriteMessageMaybeToArray(
4,
this->phones(static_cast<int>(i)),
output);
}
if (_internal_metadata_.have_unknown_fields()) {
::PROTOBUF_NAMESPACE_ID::internal::WireFormat::SerializeUnknownFields(
_internal_metadata_.unknown_fields(), output);
}
// @@protoc_insertion_point(serialize_end:tutorial.Person)
}
const char descriptor_table_protodef_demo_2eproto[] =
"\n\ndemo.proto\022\010tutorial\"\333\001\n\006Person\022\014\n\004nam"
"e\030\001 \002(\t\022\n\n\002id\030\002 \002(\005\022\r\n\005email\030\003 \001(\t\022,\n\006ph"
"ones\030\004 \003(\0132\034.tutorial.Person.PhoneNumber"
"\032M\n\013PhoneNumber\022\016\n\006number\030\001 \002(\t\022.\n\004type\030"
"\002 \001(\0162\032.tutorial.Person.PhoneType:\004HOME\""
"+\n\tPhoneType\022\n\n\006MOBILE\020\000\022\010\n\004HOME\020\001\022\010\n\004WO"
"RK\020\002\"/\n\013AddressBook\022 \n\006people\030\001 \003(\0132\020.tu"
"torial.Person"
;
對於每一個文件的生成的protodef格式,一樣是使用proto文件描述,描述文件位於protobuf-master\src\google\protobuf\descriptor.proto,因爲內容比較多,因此只摘錄一些基本內容:
// Describes a complete .proto file.
message FileDescriptorProto {
optional string name = 1; // file name, relative to root of source tree
optional string package = 2; // e.g. "foo", "foo.bar", etc.
// Names of files imported by this file.
repeated string dependency = 3;
// Indexes of the public imported files in the dependency list above.
repeated int32 public_dependency = 10;
// Indexes of the weak imported files in the dependency list.
// For Google-internal migration only. Do not use.
repeated int32 weak_dependency = 11;
// All top-level definitions in this file.
repeated DescriptorProto message_type = 4;
repeated EnumDescriptorProto enum_type = 5;
repeated ServiceDescriptorProto service = 6;
repeated FieldDescriptorProto extension = 7;
optional FileOptions options = 8;
// This field contains optional information about the original source code.
// You may safely remove this entire field without harming runtime
// functionality of the descriptors -- the information is needed only by
// development tools.
optional SourceCodeInfo source_code_info = 9;
// The syntax of the proto file.
// The supported values are "proto2" and "proto3".
optional string syntax = 12;
}
剛看到這個格式的時候我困惑了好久,感受這個是一個爲了讓顯示更加友好的文本格式,可是後來才發現,這個只是碰巧有一些特殊的可現實字符而已,本質上仍是按照protobuf的編碼格式。例如前面的描述文件,開始的兩個換行符對應的二進制爲1010,最低三個bit爲類型
https://developers.google.com/protocol-buffers/docs/encoding
The available wire types are as follows:
Type Meaning Used For
0 Varint int32, int64, uint32, uint64, sint32, sint64, bool, enum
1 64-bit fixed64, sfixed64, double
2 Length-delimited string, bytes, embedded messages, packed repeated fields
3 Start group groups (deprecated)
4 End group groups (deprecated)
5 32-bit fixed32, sfixed32, float
Each key in the streamed message is a varint with the value (field_number << 3) | wire_type – in other words, the last three bits of the number store the wire type.
因此開始的\n表示fieldnum爲1,類型爲2,也就是後面有一個長度分隔符,接下來的1010表示字符串的長度爲10,也就是字符串"demo.proto"的長度。
對每一個字符的轉義處理:protobuf-master\src\google\protobuf\stubs\strutil.cc
// ----------------------------------------------------------------------
// Escapes 'src' using C-style escape sequences, and appends the escaped string
// to 'dest'. This version is faster than calling CEscapeInternal as it computes
// the required space using a lookup table, and also does not do any special
// handling for Hex or UTF-8 characters.
// ----------------------------------------------------------------------
void CEscapeAndAppend(StringPiece src, string* dest) {
size_t escaped_len = CEscapedLength(src);
if (escaped_len == src.size()) {
dest->append(src.data(), src.size());
return;
}
size_t cur_dest_len = dest->size();
dest->resize(cur_dest_len + escaped_len);
char* append_ptr = &(*dest)[cur_dest_len];
for (int i = 0; i < src.size(); ++i) {
unsigned char c = static_cast<unsigned char>(src[i]);
switch (c) {
case '\n': *append_ptr++ = '\\'; *append_ptr++ = 'n'; break;
case '\r': *append_ptr++ = '\\'; *append_ptr++ = 'r'; break;
case '\t': *append_ptr++ = '\\'; *append_ptr++ = 't'; break;
case '\"': *append_ptr++ = '\\'; *append_ptr++ = '\"'; break;
case '\'': *append_ptr++ = '\\'; *append_ptr++ = '\''; break;
case '\\': *append_ptr++ = '\\'; *append_ptr++ = '\\'; break;
default:
if (!isprint(c)) {
*append_ptr++ = '\\';
*append_ptr++ = '0' + c / 64;
*append_ptr++ = '0' + (c % 64) / 8;
*append_ptr++ = '0' + c % 8;
} else {
*append_ptr++ = c;
}
break;
}
}
}
簡單來看,就是長度加上起始地址
protobuf-master\src\google\protobuf\repeated_field.h
int current_size_;
int total_size_;
struct Rep {
Arena* arena;
Element elements[1];
};
// We can not use sizeof(Rep) - sizeof(Element) due to the trailing padding on
// the struct. We can not use sizeof(Arena*) as well because there might be
// a "gap" after the field arena and before the field elements (e.g., when
// Element is double and pointer is 32bit).
static const size_t kRepHeaderSize;
// We reuse the Rep* for an Arena* when total_size == 0, to avoid having to do
// an allocation in the constructor when we have an Arena.
union Pointer {
Pointer(Arena* a) : arena(a) {}
Arena* arena; // When total_size_ == 0.
Element* elements; // When total_size_ != 0, this is Rep->elements of Rep.
} ptr_;
Element* elements() const {
GOOGLE_DCHECK_GT(total_size_, 0);
return ptr_.elements;
}
至關於realloc同樣,申請新的內存空間,而後整個數組進行移動。如此說來,保存一個對象的指針豈不是危險的?
template <typename Element>
inline void RepeatedField<Element>::Add(const Element& value) {
if (current_size_ == total_size_) Reserve(total_size_ + 1);
elements()[current_size_++] = value;
}
// Avoid inlining of Reserve(): new, copy, and delete[] lead to a significant
// amount of code bloat.
template <typename Element>
void RepeatedField<Element>::Reserve(int new_size) {
if (total_size_ >= new_size) return;
Rep* old_rep = total_size_ > 0 ? rep() : NULL;
Rep* new_rep;
Arena* arena = GetArenaNoVirtual();
new_size = std::max(internal::kMinRepeatedFieldAllocationSize,
std::max(total_size_ * 2, new_size));
GOOGLE_DCHECK_LE(
static_cast<size_t>(new_size),
(std::numeric_limits<size_t>::max() - kRepHeaderSize) / sizeof(Element))
<< "Requested size is too large to fit into size_t.";
size_t bytes = kRepHeaderSize + sizeof(Element) * static_cast<size_t>(new_size);
if (arena == NULL) {
new_rep = static_cast<Rep*>(::operator new(bytes));
} else {
new_rep = reinterpret_cast<Rep*>(Arena::CreateArray<char>(arena, bytes));
}
new_rep->arena = arena;
int old_total_size = total_size_;
total_size_ = new_size;
ptr_.elements = new_rep->elements;
// Invoke placement-new on newly allocated elements. We shouldn't have to do
// this, since Element is supposed to be POD, but a previous version of this
// code allocated storage with "new Element[size]" and some code uses
// RepeatedField with non-POD types, relying on constructor invocation. If
// Element has a trivial constructor (e.g., int32), gcc (tested with -O2)
// completely removes this loop because the loop body is empty, so this has no
// effect unless its side-effects are required for correctness.
// Note that we do this before MoveArray() below because Element's copy
// assignment implementation will want an initialized instance first.
Element* e = &elements()[0];
Element* limit = e + total_size_;
for (; e < limit; e++) {
new (e) Element;
}
if (current_size_ > 0) {
MoveArray(&elements()[0], old_rep->elements, current_size_);
}
// Likewise, we need to invoke destructors on the old array.
InternalDeallocate(old_rep, old_total_size);
}
能夠參考paxos中的使用方法phxpaxos-master\src\algorithm\instance.cpp:
message Header
{
required uint64 gid = 1;
required uint64 rid = 2;
required int32 cmdid = 3;
optional int32 version = 4;
};
message PaxosMsg
{
required int32 MsgType = 1;
optional uint64 InstanceID = 2;
optional uint64 NodeID = 3;
optional uint64 ProposalID = 4;
optional uint64 ProposalNodeID = 5;
optional bytes Value = 6;
optional uint64 PreAcceptID = 7;
optional uint64 PreAcceptNodeID = 8;
optional uint64 RejectByPromiseID = 9;
optional uint64 NowInstanceID = 10;
optional uint64 MinChosenInstanceID = 11;
optional uint32 LastChecksum = 12;
optional uint32 Flag = 13;
optional bytes SystemVariables = 14;
optional bytes MasterVariables = 15;
};
void Instance :: OnReceive(const std::string & sBuffer)
{
BP->GetInstanceBP()->OnReceive();
if (sBuffer.size() <= 6)
{
PLGErr("buffer size %zu too short", sBuffer.size());
return;
}
Header oHeader;
size_t iBodyStartPos = 0;
size_t iBodyLen = 0;
int ret = Base::UnPackBaseMsg(sBuffer, oHeader, iBodyStartPos, iBodyLen);
if (ret != 0)
{
return;
}
int iCmd = oHeader.cmdid();
if (iCmd == MsgCmd_PaxosMsg)
{
if (m_oCheckpointMgr.InAskforcheckpointMode())
{
PLGImp("in ask for checkpoint mode, ignord paxosmsg");
return;
}
PaxosMsg oPaxosMsg;
bool bSucc = oPaxosMsg.ParseFromArray(sBuffer.data() + iBodyStartPos, iBodyLen);
if (!bSucc)
{
BP->GetInstanceBP()->OnReceiveParseError();
PLGErr("PaxosMsg.ParseFromArray fail, skip this msg");
return;
}
if (!ReceiveMsgHeaderCheck(oHeader, oPaxosMsg.nodeid()))
{
return;
}
OnReceivePaxosMsg(oPaxosMsg);
}
else if (iCmd == MsgCmd_CheckpointMsg)
{
CheckpointMsg oCheckpointMsg;
bool bSucc = oCheckpointMsg.ParseFromArray(sBuffer.data() + iBodyStartPos, iBodyLen);
if (!bSucc)
{
BP->GetInstanceBP()->OnReceiveParseError();
PLGErr("PaxosMsg.ParseFromArray fail, skip this msg");
return;
}
if (!ReceiveMsgHeaderCheck(oHeader, oCheckpointMsg.nodeid()))
{
return;
}
OnReceiveCheckpointMsg(oCheckpointMsg);
}
}
簡單來講,就是不推薦、不容許https://stackoverflow.com/questions/3897229/extending-protobuf-with-my-own-methods