Protocol Buffers(3):閱讀一個二進制文件

博客:blog.shinelee.me | 博客園 | CSDNbash

這篇文章中,咱們將定義一個相對複雜的數據結構,直接分析其序列化後的二進制文件。數據結構

Proto文件

編寫addressbook.proto文件,在官方例子上略做修改,增長了float字段,以分析浮點數的存儲方式。ui

syntax = "proto2";

package tutorial;

message Person {
  required string name = 1;
  required int32 id = 2;
  optional string email = 3;

  enum PhoneType {
    MOBILE = 0;
    HOME = 1;
    WORK = 2;
  }

  message PhoneNumber {
    required string number = 1;
    optional PhoneType type = 2 [default = HOME];
  }

  repeated PhoneNumber phones = 4;
  repeated float weight_recent_months = 100 [packed = true];
}

message AddressBook {
  repeated Person people = 1;
}

生成編解碼文件,addressbook.pb.cc和addressbook.pb.h。this

protoc.exe addressbook.proto --cpp_out=.

序列化

編寫以下代碼,將address_book對象序列化,保存到二進制文件address_book.bin。google

int main()
{
    tutorial::AddressBook address_book;
    tutorial::Person* person = address_book.add_people();
    person->set_id(1);
    person->set_name("Jack");
    person->set_email("Jack@qq.com");
    tutorial::Person::PhoneNumber* phone_number = person->add_phones();
    phone_number->set_number("123456");
    phone_number->set_type(tutorial::Person::HOME);
    phone_number = person->add_phones();
    phone_number->set_number("234567");
    phone_number->set_type(tutorial::Person::MOBILE);

    person->add_weight_recent_months(50);
    person->add_weight_recent_months(52);
    person->add_weight_recent_months(54);

    fstream fw("./address_book.bin", ios::out | ios::binary);
    address_book.SerializePartialToOstream(&fw);
    fw.close();
    
    return 0;
}

二進制文件address_book.bin一共有62個字節,內容以下:
address_book bin file編碼

二進制文件解析

由前面的文章,每一個fieldkey = (field_number << 3) | wire_type都經過varint表示。.net

message Addressbook的第一個字段爲Person peoplePerson也是一個message,下面逐個字節地進行解析。code

0a    // (1 << 3) + 2,1爲people的field_bumber,2爲embedded message對應的wire type
3c    // 0x3c = 60,表示接下來60個字節爲Person people的數據

// 下面進入到 message Person
0a    // (1 << 3) + 2,Person的第一個字段name field_number=1,2爲string對應的wire type
04    // name字段的字符串長度爲4
4a 61 63 6b    // "Jack" 的ascii編碼

10    // (2 << 3) + 0,字段id field_number=2,0爲int32對應的wire type
01    // id爲1

1a    // (3 << 3) + 2,字段email field_number=3,2爲string對應的wire type
0b    // 0x0b = 11 email字段的字符串長度爲11
4a 61 63 6b 40 71 71 2e 63 6f 6d        // "Jack@qq.com"

    //第1個PhoneNumber,嵌套message
    22    // (4 << 3) + 2,,phones字段,field_number=4,2爲embedded message對應的wire type
    0a    // 接下來10個字節爲PhoneNumber的數據
    0a    // (1 << 3) + 2, message PhoneNumber的第一個字段number,2爲string對應的wire type
    06    // number字段的字符串長度爲6
    31 32 33 34 35 36    // "123456"
    10   // (2 << 3) + 0,PhoneType type字段,0爲enum對應的wire type
    01   // HOME,enum被視爲整數

    // 第2個PhoneNumber,嵌套message
    22 0a 0a 06 32 33 34 35 36 37 10 00  //信息解讀同上,最後的00爲MOBILE

a2 06   // 1010 0010 0000 0110 varint方式,weight_recent_months的key
        //  010 0010  000 0110 → 000 0110 0100 010 little-endian存儲
        // (100 << 3) + 2,100爲weight_recent_months的field number
        //  2爲 packed repeated field的wire type
0c    // 後面12個字節爲packed float的數據,每4個字節一個
00 00 48 42 // float 50
00 00 50 42 // float 52
00 00 58 42 // float 54

須要注意的是,repeated後面接的字段若是是個message,好比上面的PhoneNumber,有幾個PhoneNumber,編碼時其key就會出現幾回;若是接的是數值型的字段,且以packed = true壓縮存儲時,只會出現1個key,若是不以壓縮方式存儲,其key也會出現屢次,在proto3中,默認以壓縮方式進行存儲,proto2中則須要顯式地聲明。orm

至此,二進制文件已經分析完畢,如今再去看解碼代碼,就so easy了。

反序列化

這裏只貼上message Person對應的解碼代碼,能夠看到其中遇到嵌套message PhoneNumber時,會去調用PhoneNumber的解碼代碼。

bool Person::MergePartialFromCodedStream(
    ::google::protobuf::io::CodedInputStream* input) {
#define DO_(EXPRESSION) if (!PROTOBUF_PREDICT_TRUE(EXPRESSION)) goto failure
  ::google::protobuf::uint32 tag;
  // @@protoc_insertion_point(parse_start:tutorial.Person)
  for (;;) {
    ::std::pair<::google::protobuf::uint32, bool> p = input->ReadTagWithCutoffNoLastTag(16383u);
    tag = p.first;
    if (!p.second) goto handle_unusual;
    switch (::google::protobuf::internal::WireFormatLite::GetTagFieldNumber(tag)) {
      // required string name = 1;
      case 1: {
        if (static_cast< ::google::protobuf::uint8>(tag) == (10 & 0xFF)) {
          DO_(::google::protobuf::internal::WireFormatLite::ReadString(
                input, this->mutable_name()));
          ::google::protobuf::internal::WireFormat::VerifyUTF8StringNamedField(
            this->name().data(), static_cast<int>(this->name().length()),
            ::google::protobuf::internal::WireFormat::PARSE,
            "tutorial.Person.name");
        } else {
          goto handle_unusual;
        }
        break;
      }

      // required int32 id = 2;
      case 2: {
        if (static_cast< ::google::protobuf::uint8>(tag) == (16 & 0xFF)) {
          HasBitSetters::set_has_id(this);
          DO_((::google::protobuf::internal::WireFormatLite::ReadPrimitive<
                   ::google::protobuf::int32, ::google::protobuf::internal::WireFormatLite::TYPE_INT32>(
                 input, &id_)));
        } else {
          goto handle_unusual;
        }
        break;
      }

      // optional string email = 3;
      case 3: {
        if (static_cast< ::google::protobuf::uint8>(tag) == (26 & 0xFF)) {
          DO_(::google::protobuf::internal::WireFormatLite::ReadString(
                input, this->mutable_email()));
          ::google::protobuf::internal::WireFormat::VerifyUTF8StringNamedField(
            this->email().data(), static_cast<int>(this->email().length()),
            ::google::protobuf::internal::WireFormat::PARSE,
            "tutorial.Person.email");
        } else {
          goto handle_unusual;
        }
        break;
      }

      // repeated .tutorial.Person.PhoneNumber phones = 4;
      case 4: {
        if (static_cast< ::google::protobuf::uint8>(tag) == (34 & 0xFF)) {
          DO_(::google::protobuf::internal::WireFormatLite::ReadMessage(
                input, add_phones()));
        } else {
          goto handle_unusual;
        }
        break;
      }

      // repeated float weight_recent_months = 100 [packed = true];
      case 100: {
        if (static_cast< ::google::protobuf::uint8>(tag) == (802 & 0xFF)) {
          DO_((::google::protobuf::internal::WireFormatLite::ReadPackedPrimitive<
                   float, ::google::protobuf::internal::WireFormatLite::TYPE_FLOAT>(
                 input, this->mutable_weight_recent_months())));
        } else if (static_cast< ::google::protobuf::uint8>(tag) == (805 & 0xFF)) {
          DO_((::google::protobuf::internal::WireFormatLite::ReadRepeatedPrimitiveNoInline<
                   float, ::google::protobuf::internal::WireFormatLite::TYPE_FLOAT>(
                 2, 802u, input, this->mutable_weight_recent_months())));
        } else {
          goto handle_unusual;
        }
        break;
      }

      default: {
      handle_unusual:
        if (tag == 0) {
          goto success;
        }
        DO_(::google::protobuf::internal::WireFormat::SkipField(
              input, tag, _internal_metadata_.mutable_unknown_fields()));
        break;
      }
    }
  }
success:
  // @@protoc_insertion_point(parse_success:tutorial.Person)
  return true;
failure:
  // @@protoc_insertion_point(parse_failure:tutorial.Person)
  return false;
#undef DO_
}

以上。

參考

相關文章
相關標籤/搜索