一步一步從原理跟我學郵件收取及發送 10.四句代碼說清base64

時間 2019-11-06

標籤一步原理郵件收取發送代碼 base64 base 简体版

原文原文鏈接

    通過前幾篇的文章，你們應該都能預感到必定要講解 base64 函數的內容了。是的，立刻要到程序登陸的代碼，base64 是必需要實現的。

    base64 很早之前我就接觸了，在項目中也很喜歡用。但每換一個新語言我老是很懼怕，很排斥用它。這主要是緣於曾經的經歷：多年前 base64 尚未這樣普及，爲了在 java 中使用 base64 找了不少的代碼，雖然 jdk 中是有，但那是 sun 專用的，在網上找卻是不少，但要本身改造一下是很是的困難（實際上是 java 的語言特性致使很複雜，base64 自己仍是很簡單的）。由於找了不少的文檔一直都似懂非懂，由於不懂嘛，因此心理上就有排斥。直到我接了一個 xmpp 的項目必需要在純 C 中實現 base64 時仔細研究了一下，實際上是很簡單的。說實在的關於 base64 的文章和前面提到的多線程文章同樣也挺汗牛充棟的，爲何我不講解多線程，要特別講解 base64 呢？

    由於我以爲網上的 base64 講解文章基本上都是沒用的！因此我必定要再寫一篇。而我這篇，我保證您必定能看得懂！

    首先我先給出完整代碼，你們也能夠到 github 地址去下載：
https://github.com/clqsrc/c_lib_lstring/tree/master/base64

核心的代碼直接就來自博客園的博友，在 rfc 文檔和其餘一些地方也很容易找到。我直接再貼一個 C++ 的版本,這些代碼我都默認摺疊了，由於先看代碼你是學不會的。html

//算法來自 http://www.cnblogs.com/IwAdream/p/6088283.html, 加了我本身的註釋 
//基本上就是靠移位算法將 3 個字節變成 4 個字節,或者將 4 個字節變成 三個字節
//能夠看 https://zh.wikipedia.org/wiki/Base64 中的表格圖示 
//圖片能夠在本源碼中附帶的 base64.png 中看到[若是轉載的網友也轉載有圖片的話就會有] 
//其實一張圖就能明白它的原理 
//把變量提早以便更多編譯器支持 

#ifndef _BASE64_H_
#define _BASE64_H_

#include <stdio.h>
#include <stdint.h> //clq 這個其實也不是必須的 
#include <string.h>
#include <malloc.h> //clq 能夠不用,有時會衝突 
 
char base64_table[] = {
     'A','B','C','D','E','F','G','H','I','J',
     'K','L','M','N','O','P','Q','R','S','T',
     'U','V','W','X','Y','Z','a','b','c','d',
     'e','f','g','h','i','j','k','l','m','n',
     'o','p','q','r','s','t','u','v','w','x',
     'y','z','0','1','2','3','4','5','6','7',
     '8','9','+', '/', '\0'
};
 
void base64_map(uint8_t *in_block, int len) {
    int i = 0;
    for(i = 0; i < len; ++i) {
        in_block[i] = base64_table[in_block[i]];
        //printf("%d %c",in_block[i], base64_table[in_block[i]]);
    }
    if(len % 4 == 3)
        in_block[len] = '=';
    else if(len % 4 == 2)
        in_block[len] = in_block[len+1] = '=';
    return ;
}


void  base64_unmap(char *in_block) {
    int i;
    char *c;
    int decode_count = 0;
 
    for(i = 0; i < 4; ++i) {
        c = in_block + i;
 
        if(*c>='A' && *c<='Z') {
            *c -= 'A';
            continue;
        }
 
        if(*c>='a' && *c<='z') {
            *c -= 'a';
            *c += 26;
            continue;
        }
 
        if(*c == '+') {
            *c = 62;
            continue;
        }
 
        if(*c == '/') {
            *c = 63;
            continue;
        }
 
        if(*c == '=') {
            *c = 0;
            continue;
        }
 
        *c -= '0';
        *c += 52;
    }
}
 
int _base64_encode(char *in, int inlen, uint8_t *out) {
    char *in_block;
    uint8_t *out_block;
    char temp[3];
    int outlen = 0; //clq add 加一個解碼後的數據長度
    int i = 0;
 
    out_block = out;
    in_block = in;
 
    for(i = 0; i < inlen; i += 3) {
        memset(temp, 0, 3);
        memcpy(temp, in_block, i + 3 < inlen ? 3 : inlen - i);
        memset(out_block, 0, 4);
        //memset(out_block, '=', 4); //好象也不用 
 
        out_block[0] = (temp[0] >> 2) & 0x3f;
        out_block[1] = ((temp[0] << 4) & 0x30) | ((temp[1] >> 4) & 0x0f);
        out_block[2] = ((temp[1] << 2) & 0x3c) | ((temp[2] >> 6) & 0x03);
        out_block[3] = (temp[2]) & 0x3f;
        //printf("%.2x %.2x %.2x\n", temp[0], temp[1], temp[2]);
        //printf("%.2x %.2x %.2x %.2x\n", out_block[0], out_block[1], out_block[2], out_block[3]);
        out_block += 4;
        in_block += 3;
        
        outlen += 4; //clq add 加一個編碼後的數據長度
    }
 
    base64_map(out, ((inlen * 4) - 1) / 3 + 1);
    
    return outlen;
}
 
int _base64_decode(char *in, int inlen, uint8_t *out) {
    char *in_block;
    uint8_t *out_block;
    char temp[4];
    int outlen = 0; //clq add 加一個解碼後的數據長度
    int i = 0;
 
    out_block = out;
    in_block = in;

 
    for(i = 0; i < inlen; i += 4) {
        if(*in_block == '=')
            return 0;
        memcpy(temp, in_block, 4);
        memset(out_block, 0, 3);
        base64_unmap(temp);
 
        out_block[0] = ((temp[0]<<2) & 0xfc) | ((temp[1]>>4) & 3);
        out_block[1] = ((temp[1]<<4) & 0xf0) | ((temp[2]>>2) & 0xf);
        out_block[2] = ((temp[2]<<6) & 0xc0) | ((temp[3]   ) & 0x3f);
 
        out_block += 3;
        in_block +=4;
        
        outlen += 3; //clq add 加一個解碼後的數據長度//這個長度其實不對,由於不必定是 3 的倍數
        //if (temp[3] == '=') outlen -= 1; //clq add 有一個等號就表示補充了一個字節//優化放到最後去好了 
        //if (temp[3] == '=') outlen -= 1; //clq add 有一個等號就表示補充了一個字節//優化放到最後去好了 
        
    }
    
    if (in[inlen-1] == '=') outlen -= 1; //clq add 有一個等號就表示補充了一個字節//優化放到最後去好了 
    if (in[inlen-2] == '=') outlen -= 1; //clq add 有一個等號就表示補充了一個字節//優化放到最後去好了 

    //return 0;
    return outlen; //clq add 加一個解碼後的數據長度 
}
 
 
/*
int main2() {
 
    char cipher_text[64];
    int i = 0;
    
    while(scanf("%s", cipher_text) != EOF) {
        printf("%s\n", cipher_text);
 
        uint8_t *tran_cipher = (uint8_t *)malloc(sizeof(uint8_t) * 64);
        memset(tran_cipher, 0, sizeof(uint8_t) * 64);
 
        #define ENCODE
        #define DECODE
        #ifdef ENCODE
        printf("----------------ENCODE-----------------");
        base64_encode(cipher_text, strlen(cipher_text), tran_cipher);
        int len = (strlen(cipher_text) * 4 - 1) / 3 + 1;
        len = len % 4 == 3 ? len + 1 : len + 2;
        for(i = 0; i < len ; ++i)
            printf("%c", tran_cipher[i]);
        printf("\n");
 
        #endif // ENCODE
 
        #ifdef DECODE
        printf("----------------DECODE-----------------");
        base64_decode(cipher_text, strlen(cipher_text), tran_cipher);
        len = strlen(cipher_text);
        int n = len;
        while(cipher_text[--n] == '=')
            ;
        if(n == len-2)
            len = (len >> 2) * 3 - 1;
        else if(n == len-3)
            len = (len >> 2) * 3 - 2;
        else if(n == len-1)
            len = (len >> 2) * 3;
 
        for(i = 0; i < len; ++i)
            printf("%c", tran_cipher[i]);
        printf("\n");
        #endif // DECODE
    }
    return 0;
}

*/


#endif

View Code

/* 
   base64.cpp and base64.h

   Copyright (C) 2004-2008 René Nyffenegger

   This source code is provided 'as-is', without any express or implied
   warranty. In no event will the author be held liable for any damages
   arising from the use of this software.

   Permission is granted to anyone to use this software for any purpose,
   including commercial applications, and to alter it and redistribute it
   freely, subject to the following restrictions:

   1. The origin of this source code must not be misrepresented; you must not
      claim that you wrote the original source code. If you use this source code
      in a product, an acknowledgment in the product documentation would be
      appreciated but is not required.

   2. Altered source versions must be plainly marked as such, and must not be
      misrepresented as being the original source code.

   3. This notice may not be removed or altered from any source distribution.

   René Nyffenegger rene.nyffenegger@adp-gmbh.ch

*/

#include "stdafx.h"

#include "base64.h"
#include <iostream>

static const std::string base64_chars = 
             "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
             "abcdefghijklmnopqrstuvwxyz"
             "0123456789+/";


static inline bool is_base64(unsigned char c) {
  return (isalnum(c) || (c == '+') || (c == '/'));
}

std::string base64_encode(unsigned char const* bytes_to_encode, unsigned int in_len) {
  std::string ret;
  int i = 0;
  int j = 0;
  unsigned char char_array_3[3];
  unsigned char char_array_4[4];

  while (in_len--) {
    char_array_3[i++] = *(bytes_to_encode++);
    if (i == 3) {
      char_array_4[0] = (char_array_3[0] & 0xfc) >> 2;
      char_array_4[1] = ((char_array_3[0] & 0x03) << 4) + ((char_array_3[1] & 0xf0) >> 4);
      char_array_4[2] = ((char_array_3[1] & 0x0f) << 2) + ((char_array_3[2] & 0xc0) >> 6);
      char_array_4[3] = char_array_3[2] & 0x3f;

      for(i = 0; (i <4) ; i++)
        ret += base64_chars[char_array_4[i]];
      i = 0;
    }
  }

  if (i)
  {
    for(j = i; j < 3; j++)
      char_array_3[j] = '\0';

    char_array_4[0] = (char_array_3[0] & 0xfc) >> 2;
    char_array_4[1] = ((char_array_3[0] & 0x03) << 4) + ((char_array_3[1] & 0xf0) >> 4);
    char_array_4[2] = ((char_array_3[1] & 0x0f) << 2) + ((char_array_3[2] & 0xc0) >> 6);
    char_array_4[3] = char_array_3[2] & 0x3f;

    for (j = 0; (j < i + 1); j++)
      ret += base64_chars[char_array_4[j]];

    while((i++ < 3))
      ret += '=';

  }

  return ret;

}

std::string base64_decode(std::string const& encoded_string) 
{
    //--------------------------------------------------
    //clq add
    //--------------------------------------------------
  int in_len = encoded_string.size();
  int i = 0;
  int j = 0;
  int in_ = 0;
  unsigned char char_array_4[4], char_array_3[3];
  std::string ret;

  while (in_len-- && ( encoded_string[in_] != '=') && is_base64(encoded_string[in_])) {
    char_array_4[i++] = encoded_string[in_]; in_++;
    if (i ==4) {
      for (i = 0; i <4; i++)
        char_array_4[i] = base64_chars.find(char_array_4[i]);

      char_array_3[0] = (char_array_4[0] << 2) + ((char_array_4[1] & 0x30) >> 4);
      char_array_3[1] = ((char_array_4[1] & 0xf) << 4) + ((char_array_4[2] & 0x3c) >> 2);
      char_array_3[2] = ((char_array_4[2] & 0x3) << 6) + char_array_4[3];

      for (i = 0; (i < 3); i++)
        ret += char_array_3[i];
      i = 0;
    }
  }

  if (i) {
    for (j = i; j <4; j++)
      char_array_4[j] = 0;

    for (j = 0; j <4; j++)
      char_array_4[j] = base64_chars.find(char_array_4[j]);

    char_array_3[0] = (char_array_4[0] << 2) + ((char_array_4[1] & 0x30) >> 4);
    char_array_3[1] = ((char_array_4[1] & 0xf) << 4) + ((char_array_4[2] & 0x3c) >> 2);
    char_array_3[2] = ((char_array_4[2] & 0x3) << 6) + char_array_4[3];

    for (j = 0; (j < i - 1); j++) ret += char_array_3[j];
  }

  return ret;
}

View Code

    java 裏一直到 jdk1.6 纔有正式的 base64 函數，ios 的開發工具 xcode 的支持也很晚，搞獲得處都是各類 base64 的講解和第三方代碼，甚至我最喜歡的 delphi 裏都有不一樣的控件實現了不一樣的版本。給人的印象就成了這東西必定很難啊，沒事別去碰。
    因此我一語道破天機的話，接觸過 base64 的網友必定會跌破一地的眼鏡：其實 base64 的核心算法只有 4 行代碼！是的就是隻須要 4 行！其餘的代碼不過是補充字節不足，找數字和字符串進行映射轉換而已。

base64 其實就是將 3 個字節的字符串變成 4 個字節的字符串。就這麼簡單。而後再規定了一下字符串不夠 3 個字節的話怎樣補充而已。至於超過 3 字節的字符串嘛，是我的都明白：分割成多個 3 字節的循環就能夠了。至於解碼，反過來 4 字節的結果變成 3 個原文的就好了唄，因此根本不用講解解碼，咱們講解編碼過程就好了。

    講解編碼過程時先說下緣由你們就更好理解，base64 產生的緣由是要將文件中的二進制內容以可見的字符串發送出去（或者是相似的地方傳輸）。爲何要轉換成可見的字符呢？你們看到前面的內容應該都知道網絡協議基本上就是一些命令字符串了吧。那麼假如要傳輸的內容有 '\0' '\1' '\2' 這樣的內容顯然就不行了嘛。因此想個辦法把它們變成 'a' 'b' 'c' 就好了嘛。所有的字符嘛有 256 個，須要 8 個字節來表示（2的8次方嘛），可見的字符沒有這麼多，所有一對一的確定不行。咱們假設只有 64 個可見字符，怎樣去表示這 256 個實際字符呢。很簡單，每一個字節是8位的，因此須要 256 個字符來表示纔夠，那把它變成每一個字節 6 位的(2的6次方是64)不就好了嗎。用二進制的模擬一下你們會更深清楚這個算法：
----------------
1|2|3|4|5|6|7|8|
----------------
=>
-------------
|1|2|3|4|5|6|
-------------

用計算機術語來講至關於將 8 位機換成了 6 位機。可是由於咱們如今都是 8 位機（固然實際是32位或者64位，8位數據只是兼容而已）。因此變通一下，用8位地址表示 6 位的數據就好了，這樣的話前兩位就固定爲 0 ，而數據向右移動一下就能夠了，以下圖：
----------------
1|2|3|4|5|6|7|8|
----------------
=>
----------------
0|0|1|2|3|4|5|6|
----------------

好了，這就是 base64 算法！這也是爲何全部的 base64 編碼算法都有移位運算符。

等等，你會說，那第七、8位置上的數據豈不是丟失了。這個好辦啊，放到下一個字節上就好了唄。4個保存6位數據的字節恰好能夠保存住3個保存8位數據的字節嘛（由於它們都能表示24位二進制數據，3x8=24 4x6=24 嘛）。這個"巧合"應該也是前人選擇用 64 個可見字符而不是其餘數目的緣由之一吧。用圖形表示那真是再明白不過了：

---------------- ---------------- ----------------
1|2|3|4|5|6|7|8| 1|2|3|4|5|6|7|8| 1|2|3|4|5|6|7|8|
---------------- ---------------- ----------------
=>
---------------- ---------------- ---------------- ----------------
0|0|1|2|3|4|5|6| 0|0|7|8|1|2|3|4| 0|0|5|6|7|8|1|2| 0|0|3|4|5|6|7|8|
---------------- ---------------- ---------------- ----------------

在 base64 中的 wiki 文檔中也有相似的圖形，實際上我上面的圖示就是受了 wiki 的啓發。如圖：
java

上面的文字表格可能排版後不太好看，我再傳張圖片吧：ios

移位操做用 java 來說解就太不合適了，因此咱們只講解 C 語言的代碼，更直觀。代碼中惟一要注意的是隊了第一個字節能夠直接經過移位來獲得外，其餘三個字節的數據，都要從源數據的兩個字節中同時獲得一部分"組合"起來（算法如此，實際上第四個字節恰好只來自於源數據的第三個字節的後半部分）。組合兩個個字節的運算你們在大學應該都學過了，那就是二進制的 "或" 操做。因此這篇文章雖然我自認爲應該人人都能懂，不過也要讀者上過基本的計算機原理課程。因此大學的教育並非不少程序員認爲的那樣不實用 -- 那些都是有用的基礎。在後面的文章中你們還會看到須要大學課程內容的部分。我知道有不少程序員是非科班的，那確實應該用業餘時間內本身補充一下專業課程。要不確實有不少東西是不易理解的。好比沒學過彙編語言並實機操做的話恐怕是永遠也理解不了指針的。固然優秀的程序員與是否科班出身沒有什麼關係。

有了以上的知識，再來看看算法的核心，基本上就是不用講解了（固然了咱們仍是要再講解一下）。
核心的代碼很是的簡單，就象咱們開始說的那樣，只要4句話：git

        out_block[0] = (temp[0] >> 2) & 0x3f;
        out_block[1] = ((temp[0] << 4) & 0x30) | ((temp[1] >> 4) & 0x0f);
        out_block[2] = ((temp[1] << 2) & 0x3c) | ((temp[2] >> 6) & 0x03);
        out_block[3] = (temp[2]) & 0x3f;

除了移位和 "或" 操做外，還用了 "與" 操做來去掉無關的數據位。與 0x3f 進行 "與" 操做是爲了將前兩位置成 0 。與其餘數字進行 "與" 操做的做用相似，只是要置 0 的位置不一樣罷了。
結合前面的圖示，這4句話的意思以下：
1.第一個結果字節直接來自原文第一個字節移位，再保證前兩位是空就好了；
2.第二個結果字節由原文第一個字節和第二個字節的各一部分組成;
3.第三個結果字節由原文第二個字節和第三個字節的各一部分組成;
4.第四個結果字節直接來自原文第三個字節，而且不用移位，再保證前兩位是空就好了.

另一份的 C++ 代碼，算法思想也是同樣的，只是處理方法略有差別而已，你們有興趣能夠本身分析。具體以下：

程序員

    char_array_4[0] = (char_array_3[0] & 0xfc) >> 2;
    char_array_4[1] = ((char_array_3[0] & 0x03) << 4) + ((char_array_3[1] & 0xf0) >> 4);
    char_array_4[2] = ((char_array_3[1] & 0x0f) << 2) + ((char_array_3[2] & 0xc0) >> 6);
    char_array_4[3] = char_array_3[2] & 0x3f;

我我的以爲前面的 C 代碼更清晰直接，因此用 C 的代碼進行的講解。其餘補位、字符映射、變種什麼的實在不用說了，明白了前面的算法一通百通，你們能夠本身去看 wiki 或者其餘文章，wiki 的連接爲：
https://zh.wikipedia.org/wiki/Base64

有了這部分代碼，電子郵箱的登陸過程就易如反掌。具體能夠到文末連接下載，核心代碼很是簡單：github

    //用 base64 登陸
    s = NewString("AUTH LOGIN\r\n", m);
    SendBuf(gSo, s->str, s->len);    
    
    rs = RecvLine(gSo, m, &buf); //只收取一行
    printf("\r\nRecvLine:%s\r\n", rs->str);
    
    s = NewString("test1@newbt.net", m); //要換成你的用戶名,注意 163 郵箱的話不要帶後面的 @域名 部分
    s = base64_encode(s);
    LString_AppendConst(s,"\r\n");
    SendBuf(gSo, s->str, s->len);    
    
    rs = RecvLine(gSo, m, &buf); //只收取一行
    printf("\r\nRecvLine:%s\r\n", rs->str);
    

    s = NewString("123456", m); //要換成您的密碼
    s = base64_encode(s);
    LString_AppendConst(s,"\r\n");
    SendBuf(gSo, s->str, s->len);    
    
    rs = RecvLine(gSo, m, &buf); //只收取一行
    printf("\r\nRecvLine:%s\r\n", rs->str);

爲了不你們誤覺得 lstring 很龐雜，因此我特地把 base64 的代碼放到一個獨立目錄，實際寫程序時最好仍是放到同一個目錄中比較方便.本章節的完整源碼能夠到如下地址下載（附帶了那份 C++ 的代碼，實際上並不須要編譯它）：
https://github.com/clqsrc/c_lib_lstring/tree/master/email_book/book_10

redis

(成功登陸的運行截圖)算法

--------------------------------------------------express

本系列文章已受權百家號 "clq的程序員學前班" . 文章編排上略有差別.