Golang访问二维数组和切片的性能探究

引文

前几天有个学弟来问我为什么用 Golang 语言写的一个算法题，用原生二维数组类型就不超时，而使用嵌套的 slice反而超时。

于是我利用 Golang的 benchmark测试以及 unsafe.pointer 来分别测试连续和随机访问性能以及分配的内存地址是否连续这两个因素。

地址分配

首先，二维数组的地址肯定是连续分配的，这个就不用再进行测试了，这里针对 Slice列数的两种情况分别进行测试。

test1: 每一个子Slice的维数均相同的情况

func main() {
	slice := [][]int{{1, 2, 3}, {4, 5, 6}, {7, 8, 9}}
	fmt.Println(slice)
	fmt.Printf("%x\n", unsafe.Pointer(&slice[0][0]))
	fmt.Printf("%x\n", unsafe.Pointer(&slice[0][1]))
	fmt.Printf("%x\n", unsafe.Pointer(&slice[0][2]))
	fmt.Printf("%x\n", unsafe.Pointer(&slice[1][0]))
	fmt.Printf("%x\n", unsafe.Pointer(&slice[1][1]))
	fmt.Printf("%x\n", unsafe.Pointer(&slice[1][2]))
	fmt.Printf("%x\n", unsafe.Pointer(&slice[2][0]))
	fmt.Printf("%x\n", unsafe.Pointer(&slice[2][1]))
	fmt.Printf("%x\n", unsafe.Pointer(&slice[2][2]))
}

// output:
c0000ae090
c0000ae098
c0000ae0a0
c0000ae0a8
c0000ae0b0
c0000ae0b8
c0000ae0c0
c0000ae0c8
c0000ae0d0

可以看到，我的机器是64位的，所以每一个 int类型占8位，所有的地址均是连续的。

test2: 存在子Slice的维数不相同的情况

func main() {
	slice := [][]int{{1, 2, 3}, {4, 5, 6, 7}, {7, 8, 9}}
	fmt.Println(slice)
	fmt.Printf("%x\n", unsafe.Pointer(&slice[0][0]))
	fmt.Printf("%x\n", unsafe.Pointer(&slice[0][1]))
	fmt.Printf("%x\n", unsafe.Pointer(&slice[0][2]))
	fmt.Printf("%x\n", unsafe.Pointer(&slice[1][0]))
	fmt.Printf("%x\n", unsafe.Pointer(&slice[1][1]))
	fmt.Printf("%x\n", unsafe.Pointer(&slice[1][2]))
	fmt.Printf("%x\n", unsafe.Pointer(&slice[1][3]))
	fmt.Printf("%x\n", unsafe.Pointer(&slice[2][0]))
	fmt.Printf("%x\n", unsafe.Pointer(&slice[2][1]))
	fmt.Printf("%x\n", unsafe.Pointer(&slice[2][2]))
}
// output:
c000012150
c000012158
c000012160
c00000e200
c00000e208
c00000e210
c00000e218
c000012168
c000012170
c000012178

可以看到，我们使得第二个子 Slice的长度为 4，则此时所有子 Slice内部的地址是连续的，而不同的 Slice直接地址并非再连续。

Benchmark测试

下面我们对数组和切片的顺序访问和随机访问性能做一个测试：

test1: 顺序访问

func BenchmarkSlice(b *testing.B) {
	row, col := 10000, 10000
	var slice [][]int
	for i := 0; i < row; i++ {
		tmp := make([]int, col)
		slice = append(slice, tmp)
	}
	b.ResetTimer()
	for t := 0; t < b.N; t++ {
		for i := 0; i < row; i++ {
			for j := 0; j < col;  j++ {
				_ = slice[i][j]
			}
		}
	}

	b.StopTimer()
}

func BenchmarkArray(b *testing.B) {
	var arr [10000][10000]int
	row, col := 10000, 10000
	b.ResetTimer()
	for t := 0; t < b.N; t++ {
		for i := 0; i < row; i++ {
			for j := 0; j < col;  j++ {
				_ = arr[i][j]
			}
		}
	}
	b.StopTimer()
}

// output:
goos: windows
goarch: amd64
pkg: playground
cpu: AMD Ryzen 7 5800H with Radeon Graphics
BenchmarkSlice-16             32          36270119 ns/op
BenchmarkArray-16             46          24420687 ns/op
PASS
ok      playground      2.821s

我们可以发现，数组的连续读写比切片快不少。

test2: 随机访问读写

func BenchmarkSlice(b *testing.B) {
	row, col := 10000, 10000
	var slice [][]int
	rand.Seed(23333)
	for i := 0; i < row; i++ {
		var tmp []int
		if i % 2 == 0{
			tmp = make([]int, col - 1)
		} else {
			tmp = make([]int, col)
		}
		slice = append(slice, tmp)
	}
	b.ResetTimer()
	for t := 0; t < b.N; t++ {
		for i := 0; i < 1000000; i++ {
			x := rand.Intn(row - 1)
			y := rand.Intn(col - 1)
			_ = slice[x][y]
		}
	}

	b.StopTimer()
}

func BenchmarkArray(b *testing.B) {
	var arr [10000][10000]int
	row, col := 10000, 10000
	rand.Seed(23333)
	b.ResetTimer()
	for t := 0; t < b.N; t++ {
		for i := 0; i < 1000000; i++ {
			x := rand.Intn(row - 1)
			y := rand.Intn(col - 1)
			_ = arr[x][y]
		}
	}
	b.StopTimer()
}

// output:
BenchmarkSlice-16             81          15271412 ns/op
BenchmarkArray-16             76          14989986 ns/op