base64.cpp


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95

#include <cstdint>
#include "base64.h"

using namespace std;

namespace Base64{

	char alphabet[65]="ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
	uint8_t revbet[128]={ //reverse look-up table of `alphabet` (for decoding)
#define XX (127)
		XX,XX,XX,XX,XX,XX,XX,XX,XX,XX,XX,XX,XX,XX,XX,XX, //0-15
		XX,XX,XX,XX,XX,XX,XX,XX,XX,XX,XX,XX,XX,XX,XX,XX, //16-31
		XX,XX,XX,XX,XX,XX,XX,XX,XX,XX,XX,62,XX,XX,XX,63, //32-47
		52,53,54,55,56,57,58,59,60,61,XX,XX,XX,XX,XX,XX, //48-63
		XX, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14, //64-79
		15,16,17,18,19,20,21,22,23,24,25,XX,XX,XX,XX,XX, //80-95
		XX,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40, //96-111
		41,42,43,44,45,46,47,48,49,50,51,XX,XX,XX,XX,XX, //112-127
#undef XX
	};

	//This is the standard base64 algorithm; most of the code deals with correct padding, actually.
	//There are a lot of magic numbers here, but they should be obvious regarding the algorithm.
	string encode(const string &data){
		int sz=data.size();
		if(sz==0)return {};
		int blocks=sz/3;
		string res(4*blocks+4*(sz%3!=0),'\0');
		int x;
		for(int i=0;i<blocks;i++){
			x=((uint8_t)data[3*i]<<16)|((uint8_t)data[3*i+1]<<8)|(uint8_t)data[3*i+2];
			res[4*i+3]=alphabet[x&0x3f]; x>>=6;
			res[4*i+2]=alphabet[x&0x3f]; x>>=6;
			res[4*i+1]=alphabet[x&0x3f]; x>>=6;
			res[4*i+0]=alphabet[x];
		}
		switch(sz%3){
			case 1:
				res[4*blocks+0]=alphabet[(uint8_t)data[3*blocks]>>2];
				res[4*blocks+1]=alphabet[((uint8_t)data[3*blocks]&0x3)<<4];
				res[4*blocks+2]='=';
				res[4*blocks+3]='=';
				break;

			case 2:
				res[4*blocks+0]=alphabet[(uint8_t)data[3*blocks]>>2];
				res[4*blocks+1]=alphabet[(((uint8_t)data[3*blocks]&0x3)<<4)|((uint8_t)data[3*blocks+1]>>4)];
				res[4*blocks+2]=alphabet[(((uint8_t)data[3*blocks+1]&0xf)<<2)];
				res[4*blocks+3]='=';
				break;
		}
		return res;
	}

	//The inverse of `encode`.
	string decode(const string &dataS){
		int szS=dataS.size();
		if(szS==0)return {};
		uint8_t data[szS];
		int sz=0;
		for(char c : dataS){ //First filter away all non-base64 characters (probably mostly newlines, if any)
			if(revbet[c&0x7f]!=127)data[sz++]=revbet[c&0x7f];
		}

		int blocks=sz/4;
		int endlen;
		if(sz%4==0){ //Detect padding; tries to be nice and forgive bad padding
			if(data[sz-1]=='='){
				blocks--;
				if(data[sz-2]=='=')endlen=1;
				else endlen=2;
			} else endlen=0;
		} else endlen=sz%4-1; //padding not present... assume the data is OK sort-of?
		string res(3*blocks+endlen,'\0');
		int x;
		for(int i=0;i<blocks;i++){
			x=(data[4*i]<<18)|(data[4*i+1]<<12)|(data[4*i+2]<<6)|data[4*i+3];
			res[3*i+2]=x&0xff; x>>=8;
			res[3*i+1]=x&0xff; x>>=8;
			res[3*i+0]=x;
		}
		switch(endlen){
			case 1:
				res[3*blocks+0]=(data[4*blocks]<<2)|(data[4*blocks+1]>>4);
				break;

			case 2:
				res[3*blocks+0]=(data[4*blocks]<<2)|(data[4*blocks+1]>>4);
				res[3*blocks+1]=(data[4*blocks+1]<<4)|(data[4*blocks+2]>>2);
				break;
		}
		return res;
	}

}