首页 > 技术文章 > 实现base64的编码解码,深刻理解base64

janbar 2020-01-10 18:27 原文

上代码

#include<stdio.h>
#include<string.h>
#include<stdlib.h>

const char padding   = '=';
const char base64e[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
const char base64d[] = {
0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,
0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,
0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x3e,0xff,0xff,0xff,0x3f,
0x34,0x35,0x36,0x37,0x38,0x39,0x3a,0x3b,0x3c,0x3d,0xff,0xff,0xff,0x00,0xff,0xff,
0xff,0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,
0x0f,0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,0xff,0xff,0xff,0xff,0xff,
0xff,0x1a,0x1b,0x1c,0x1d,0x1e,0x1f,0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,
0x29,0x2a,0x2b,0x2c,0x2d,0x2e,0x2f,0x30,0x31,0x32,0x33};

/*
根据 base64e 得到 base64d
base64d 的长度为 base64e 和 padding 字符ascii码最大值
*/
void get_base64d(void) {
  if (strlen(base64e) != 64) {
    printf("encoding alphabet is not 64-bytes long\n");
    exit(1); // 必须是64个字符
  }
  int i, j;
  for (i=0;i<64;i++) {
    if (base64e[i] == '\n' || base64e[i] == '\r') {
      printf("encoding alphabet contains newline character\n");
      exit(1); // 不能包含换行回车字符
    }
    if (base64e[i] == padding) {
      printf("there are duplicate characters :%c\n", base64e[i]);
      exit(1); // 不能和padding相同
    }
    for (j=i+1;j<64;j++) {
      if (base64e[i] == base64e[j]) {
        printf("there are duplicate characters :%c\n", base64e[i]);
        exit(1); // 不能存在2个相同字符
      }
    }
  }

  char tmp[256]; // 预设256个字符,并初始化
  for (i=0; i<256;i++){
    tmp[i] = 0xff;
  }
  for (i = 0,j = -1; base64e[i] != 0 ;i++) {
    tmp[base64e[i]] = i; // 将对应位置置为ascii码值
    if (base64e[i] > j) {
      j = base64e[i]; // 找到ascii码最大值
    }
  }
  tmp[padding] = 0;
  if (padding > j) {
    j = padding; // padding也要计入最大值里面
  }

  // 打印得到的数组,不必为256那么长,只要包含最大ascii值就行
  printf("const char base64d[] = {");
  for (i=0; i <= j ;i++) {
    if (((i+1)%16) == 1)
      printf("\n");
    printf("0x%02x,", tmp[i]&0xff);
  }
  printf("\b};\n");
}

// max_len为dest最大长度,可能存在编码不全,需要保证max_len够长
void encode_base_64(const char *src, char *dest, int max_len) {
  int n, i;
  size_t l = strlen(src);
  max_len = (max_len - 1) / 4;
  for(i = 0;i < max_len && l >= 3; i++, src += 3, l -= 3) {
    n = src[0] << 16 | src[1] << 8 | src[2];
    *dest++ = base64e[(n >> 18) & 0x3f];
    *dest++ = base64e[(n >> 12) & 0x3f];
    *dest++ = base64e[(n >>  6) & 0x3f];
    *dest++ = base64e[n & 0x3f];
  }
  if (l == 1) {
    n = src[0] << 16;
    *dest++ = base64e[(n >> 18) & 0x3f];
    *dest++ = base64e[(n >> 12) & 0x3f];
    *dest++ = padding;
    *dest++ = padding;
  } else if (l == 2) {
    n = src[0] << 16 | src[1] << 8;
    *dest++ = base64e[(n >> 18) & 0x3f];
    *dest++ = base64e[(n >> 12) & 0x3f];
    *dest++ = base64e[(n >>  6) & 0x3f];
    *dest++ = padding;
  }
  *dest = 0;
}

// max_len为dest最大长度,可能解码不全,需要保证max_len够长
// 如果真的不够用那只能存max_len-1个字符,最后一位为'\0'结束符
void decode_base_64(const char *src,char *dest, int max_len) {
  size_t l = strlen(src);
  int n, i;
  for (i=0; i<l; i+=4) {
    n = base64d[src[i]]<<18  | base64d[src[i+1]]<<12 | 
        base64d[src[i+2]]<<6 | base64d[src[i+3]];

    if (--max_len <= 0) break;
    *dest++ = (n>>16)&0xff;
    if (--max_len <= 0) break;
    *dest++ = (n>>8)&0xff;
    if (--max_len <= 0) break;
    *dest++ = n&0xff;
  }
  *dest = 0;
}

int main(int argc,char *argv[])
{
  if (argc != 2) {
    printf("usage :%s [get|string]\n", argv[0]);
    return 0;
  }
  if (strcmp(argv[1],"get") == 0) {
    get_base64d();
    return 0;
  }

  int len = strlen(argv[1])*2; // 预留足够的存储空间
  char *s1 = (char*)malloc(sizeof(char)*len);
  if (NULL == s1) {
    printf("malloc error\n");
    return 1;
  }
  char *s2 = (char*)malloc(sizeof(char)*len);
  if (NULL == s2) {
    printf("malloc error\n");
    return 1;
  }
  strcpy(s1, argv[1]);
  printf("baseStr:\"%s\"\n", s1);

  encode_base_64(s1,s2,len);
  printf("base64e:\"%s\"\n", s2);

  decode_base_64(s2,s1,len);
  printf("base64d:\"%s\"\n", s1);
  return 0;
}

执行.\a.exe get 会执行get_base64d根据base64e和padding产生一个base64d用于解码时候查表,这样解码更快速。下面放一个执行结果:

.\a.exe asdzxcaqwe
baseStr:"asdzxcaqwe"
base64e:"YXNkenhjYXF3ZQ=="
base64d:"asdzxcaqwe"

推荐阅读