记一次go base64编码排查

前言

开发中偶然发现，结构体中声明了[]byte类型的字段，json marshal后，[]byte类型的字段会被base64编码，下面通过一个例子来找找base64编码的原因

排查过程

例子

package main

import (
	"encoding/json"
	"testing"
)

type Reply struct {
	Name    string
	Content []byte
}

func TestJSONMarshal(t *testing.T) {
	var r = Reply{
		Name:    "lam",
		Content: []byte("pongpong"),
	}
	res, _ := json.Marshal(&r)
	t.Log(string(res))
}

输出

=== RUN   TestJSONMarshal
    client_test.go:26: {"Name":"lam","Content":"cG9uZ3Bvbmc="}
--- PASS: TestJSONMarshal (0.00s)
PASS

跟着debug进入encoding/json 包内部，发现注释部分已经说明[]byte类型会被base64编码：

// ...
// Array and slice values encode as JSON arrays, except that
// []byte encodes as a base64-encoded string, and a nil slice
// encodes as the null JSON value.
// ...

继续根据方法调用链找到具体的编码位置：

func (e *encodeState) marshal(v any, opts encOpts) (err error) {
	defer func() {
		if r := recover(); r != nil {
			if je, ok := r.(jsonError); ok {
				err = je.error
			} else {
				panic(r)
			}
		}
	}()
	e.reflectValue(reflect.ValueOf(v), opts)
	return nil
}

func (e *encodeState) reflectValue(v reflect.Value, opts encOpts) {
	valueEncoder(v)(e, v, opts)
}

func valueEncoder(v reflect.Value) encoderFunc {
	if !v.IsValid() {
		return invalidValueEncoder
	}
	return typeEncoder(v.Type())
}

func typeEncoder(t reflect.Type) encoderFunc {
        // ...
	// Compute the real encoder and replace the indirect func with it.
	f = newTypeEncoder(t, true)
	wg.Done()
	encoderCache.Store(t, f)
	return f
}

func newTypeEncoder(t reflect.Type, allowAddr bool) encoderFunc {
        // ...
	switch t.Kind() {
        // ...
	case reflect.Slice:
		return newSliceEncoder(t)

        // ...
	default:
		return unsupportedTypeEncoder
	}
}

func newSliceEncoder(t reflect.Type) encoderFunc {
	// Byte slices get special treatment; arrays don't.
	if t.Elem().Kind() == reflect.Uint8 {
		p := reflect.PointerTo(t.Elem())
		if !p.Implements(marshalerType) && !p.Implements(textMarshalerType) {
			return encodeByteSlice
		}
	}
	enc := sliceEncoder{newArrayEncoder(t)}
	return enc.encode
}

该方法会根据slice的长度选择效率更高的字符串拼接方式

func encodeByteSlice(e *encodeState, v reflect.Value, _ encOpts) {
	if v.IsNil() {
		e.WriteString("null")
		return
	}
	s := v.Bytes()
	e.WriteByte('"')
	encodedLen := base64.StdEncoding.EncodedLen(len(s))
	if encodedLen <= len(e.scratch) {
		// If the encoded bytes fit in e.scratch, avoid an extra
		// allocation and use the cheaper Encoding.Encode.
		dst := e.scratch[:encodedLen]
		base64.StdEncoding.Encode(dst, s)
		e.Write(dst)
	} else if encodedLen <= 1024 {
		// The encoded bytes are short enough to allocate for, and
		// Encoding.Encode is still cheaper.
		dst := make([]byte, encodedLen)
		base64.StdEncoding.Encode(dst, s)
		e.Write(dst)
	} else {
		// The encoded bytes are too long to cheaply allocate, and
		// Encoding.Encode is no longer noticeably cheaper.
		enc := base64.NewEncoder(base64.StdEncoding, e)
		enc.Write(s)
		enc.Close()
	}
	e.WriteByte('"')
}

总结

至此，[]byte被base64编码的代码已经找着了，思考了下go之所以选择编码[]byte，是为了避免使用utf-8进行编码转换的时候出现非预期的字符，导致“乱码”问题；类似的在AES加密等场景下，也会选择把加密后的二进制数组使用base64或其他方式的编码为字符串进行传输和存储，待使用时进行base64解码，再操作原始的[]byte数据。