0%

Redis源码剖析之SDS

以下涉及到的源码均为redis5.0-rc3版本的代码【点击查看官方源码

SDS简介

简单动态字符串(simple dynamic string),是redis底层为字符串而设计的一种数据结构,是针对C字符串的一个改良版。凡是涉及到需要修改的字符串均采用sds来存储,而非C字符串。

SDS数据结构

如下是sds数据结构的定义(sds.h头文件中):

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
/* Note: sdshdr5 is never used, we just access the flags byte directly.
* However is here to document the layout of type 5 SDS strings. */
struct __attribute__ ((__packed__)) sdshdr5 {
unsigned char flags; /* 3 lsb of type, and 5 msb of string length */
char buf[];
};
struct __attribute__ ((__packed__)) sdshdr8 {
uint8_t len; /* used */
uint8_t alloc; /* excluding the header and null terminator */
unsigned char flags; /* 3 lsb of type, 5 unused bits */
char buf[];
};
struct __attribute__ ((__packed__)) sdshdr16 {
uint16_t len; /* used */
uint16_t alloc; /* excluding the header and null terminator */
unsigned char flags; /* 3 lsb of type, 5 unused bits */
char buf[];
};
struct __attribute__ ((__packed__)) sdshdr32 {
uint32_t len; /* used */
uint32_t alloc; /* excluding the header and null terminator */
unsigned char flags; /* 3 lsb of type, 5 unused bits */
char buf[];
};
struct __attribute__ ((__packed__)) sdshdr64 {
uint64_t len; /* used */
uint64_t alloc; /* excluding the header and null terminator */
unsigned char flags; /* 3 lsb of type, 5 unused bits */
char buf[];
};

从以上源码中可看见,redis针对C中类型长度的不同定义了5中结构:sdshdr5、sdshdr8、sdshdr16、sdshdr32、sdshdr64,其中sdshdr5已经不再使用了(由源码中注释得知-“sdshdr5 is never used, we just access the flags byte directly”)。下面给出结构示意图:
redis-1-1
其中:

  • len:已使用的空间;
  • alloc:除了空符号和结构头部之外所有的已分配空间,即真正为字符串分配的空间;
  • flags:结构类型
  • buf[]:存字符串的空间;

以sdshdr16结构的字符串“string”为例,如下所示:
redis-1-2
总共占用17(4+4+1+6+1+1)个字节,这里的flags为何为2将在后续进行说明。

SDS特点

  1. 能以O(1)的复杂度获取字符串的长度(len)以及剩余的未使用空间情况(alloc-len:此将对空间的分配和释放有重大影响)。
  2. 保留了C字符串的空字符‘\0’,这样就可以是的sds能无影响的使用部分的C字符串操作函数。
  3. 在空间分配方面,采用结构类(java的集合)常用的预分配策略。而sds的分配策略是当前空间小于1M时则变成当前空间的两倍,否则直接再分配1M。
  4. 在空间回收方面,sds不会立即释放,而是先保留,防止后续在空间不足的情况下再次分配带来的代价。
  5. 相比C字符串而言,sds是二进制安全的,因为sds识别整个字符串是通过len来判断,并不是同C而言是通过空字符结尾来判断。

SDS操作函数

这里只列出部分函数。
在列出操作函数之前,首先给出如下的宏定义:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
//定义结构类型
#define SDS_TYPE_5 0
#define SDS_TYPE_8 1
#define SDS_TYPE_16 2
#define SDS_TYPE_32 3
#define SDS_TYPE_64 4
//定义结构类型的掩码
#define SDS_TYPE_MASK 7
#define SDS_TYPE_BITS 3
//获取header指针
#define SDS_HDR_VAR(T,s) struct sdshdr##T *sh = (void*)((s)-(sizeof(struct sdshdr##T)));
#define SDS_HDR(T,s) ((struct sdshdr##T *)((s)-(sizeof(struct sdshdr##T))))
//sdshdr5类型的len
#define SDS_TYPE_5_LEN(f) ((f)>>SDS_TYPE_BITS)

接下来是6个静态内联函数。在看它们之前,需要先说明一下sds结构中的flags,前面说了它是代表结构的类型,那么它又是怎样来表明的呢?首先已知sds字符串s,那么根据内存分布情况就可以得到flags的值为s[-1], 在将其与宏定义的类型掩码进行与运算便得出类型值,如此便可以知道一个sds究竟使用的是哪儿种类型进行存储的了。

获取len的方法:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
static inline size_t sdslen(const sds s) {
unsigned char flags = s[-1];
switch(flags&SDS_TYPE_MASK) {
case SDS_TYPE_5:
return SDS_TYPE_5_LEN(flags);
case SDS_TYPE_8:
return SDS_HDR(8,s)->len;
case SDS_TYPE_16:
return SDS_HDR(16,s)->len;
case SDS_TYPE_32:
return SDS_HDR(32,s)->len;
case SDS_TYPE_64:
return SDS_HDR(64,s)->len;
}
return 0;
}

获取free空间(即未被使用的空间)的方法:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
static inline size_t sdsavail(const sds s) {
unsigned char flags = s[-1];
switch(flags&SDS_TYPE_MASK) {
case SDS_TYPE_5: {
return 0;
}
case SDS_TYPE_8: {
SDS_HDR_VAR(8,s);
return sh->alloc - sh->len;
}
case SDS_TYPE_16: {
SDS_HDR_VAR(16,s);
return sh->alloc - sh->len;
}
case SDS_TYPE_32: {
SDS_HDR_VAR(32,s);
return sh->alloc - sh->len;
}
case SDS_TYPE_64: {
SDS_HDR_VAR(64,s);
return sh->alloc - sh->len;
}
}
return 0;
}

对len的值进行重计算:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
//直接进行赋值设置
static inline void sdssetlen(sds s, size_t newlen) {
unsigned char flags = s[-1];
switch(flags&SDS_TYPE_MASK) {
case SDS_TYPE_5:
{
unsigned char *fp = ((unsigned char*)s)-1;
*fp = SDS_TYPE_5 | (newlen << SDS_TYPE_BITS);
}
break;
case SDS_TYPE_8:
SDS_HDR(8,s)->len = newlen;
break;
case SDS_TYPE_16:
SDS_HDR(16,s)->len = newlen;
break;
case SDS_TYPE_32:
SDS_HDR(32,s)->len = newlen;
break;
case SDS_TYPE_64:
SDS_HDR(64,s)->len = newlen;
break;
}
}

//相加改变len值
static inline void sdsinclen(sds s, size_t inc) {
unsigned char flags = s[-1];
switch(flags&SDS_TYPE_MASK) {
case SDS_TYPE_5:
{
unsigned char *fp = ((unsigned char*)s)-1;
unsigned char newlen = SDS_TYPE_5_LEN(flags)+inc;
*fp = SDS_TYPE_5 | (newlen << SDS_TYPE_BITS);
}
break;
case SDS_TYPE_8:
SDS_HDR(8,s)->len += inc;
break;
case SDS_TYPE_16:
SDS_HDR(16,s)->len += inc;
break;
case SDS_TYPE_32:
SDS_HDR(32,s)->len += inc;
break;
case SDS_TYPE_64:
SDS_HDR(64,s)->len += inc;
break;
}
}

获取sds字符串总共分配的空间:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
/* sdsalloc() = sdsavail() + sdslen() */
static inline size_t sdsalloc(const sds s) {
unsigned char flags = s[-1];
switch(flags&SDS_TYPE_MASK) {
case SDS_TYPE_5:
return SDS_TYPE_5_LEN(flags);
case SDS_TYPE_8:
return SDS_HDR(8,s)->alloc;
case SDS_TYPE_16:
return SDS_HDR(16,s)->alloc;
case SDS_TYPE_32:
return SDS_HDR(32,s)->alloc;
case SDS_TYPE_64:
return SDS_HDR(64,s)->alloc;
}
return 0;
}

重新设置alloc值:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
static inline void sdssetalloc(sds s, size_t newlen) {
unsigned char flags = s[-1];
switch(flags&SDS_TYPE_MASK) {
case SDS_TYPE_5:
/* Nothing to do, this type has no total allocation info. */
break;
case SDS_TYPE_8:
SDS_HDR(8,s)->alloc = newlen;
break;
case SDS_TYPE_16:
SDS_HDR(16,s)->alloc = newlen;
break;
case SDS_TYPE_32:
SDS_HDR(32,s)->alloc = newlen;
break;
case SDS_TYPE_64:
SDS_HDR(64,s)->alloc = newlen;
break;
}
}

sds空间分配和释放相关的两个函数如下所示:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
//sds最大空间预分配值(在头文件中顶定义)
#define SDS_MAX_PREALLOC (1024*1024)


/* Enlarge the free space at the end of the sds string so that the caller
* is sure that after calling this function can overwrite up to addlen
* bytes after the end of the string, plus one more byte for nul term.
*
* Note: this does not change the *length* of the sds string as returned
* by sdslen(), but only the free buffer space we have. */
sds sdsMakeRoomFor(sds s, size_t addlen) {
void *sh, *newsh;
size_t avail = sdsavail(s);
size_t len, newlen;
char type, oldtype = s[-1] & SDS_TYPE_MASK;
int hdrlen;

/* Return ASAP if there is enough space left. */
if (avail >= addlen) return s;

//如果可用空间不足的情况下,进行空间增加
len = sdslen(s);
sh = (char*)s-sdsHdrSize(oldtype);
newlen = (len+addlen);
if (newlen < SDS_MAX_PREALLOC)
//小于1M直接变为原来的2倍
newlen *= 2;
else
//大于1M的情况,累加1M
newlen += SDS_MAX_PREALLOC;

type = sdsReqType(newlen);

/* Don't use type 5: the user is appending to the string and type 5 is
* not able to remember empty space, so sdsMakeRoomFor() must be called
* at every appending operation. */
if (type == SDS_TYPE_5) type = SDS_TYPE_8;

hdrlen = sdsHdrSize(type);
if (oldtype==type) {
newsh = s_realloc(sh, hdrlen+newlen+1);
if (newsh == NULL) return NULL;
s = (char*)newsh+hdrlen;
} else {
/* Since the header size changes, need to move the string forward,
* and can't use realloc */
newsh = s_malloc(hdrlen+newlen+1);
if (newsh == NULL) return NULL;
memcpy((char*)newsh+hdrlen, s, len+1);
s_free(sh);
s = (char*)newsh+hdrlen;
s[-1] = type;
sdssetlen(s, len);
}
sdssetalloc(s, newlen);
return s;
}


/* Reallocate the sds string so that it has no free space at the end. The
* contained string remains not altered, but next concatenation operations
* will require a reallocation.
*
* After the call, the passed sds string is no longer valid and all the
* references must be substituted with the new pointer returned by the call. */
sds sdsRemoveFreeSpace(sds s) {
void *sh, *newsh;
char type, oldtype = s[-1] & SDS_TYPE_MASK;
int hdrlen, oldhdrlen = sdsHdrSize(oldtype);
size_t len = sdslen(s);
sh = (char*)s-oldhdrlen;

/* Check what would be the minimum SDS header that is just good enough to
* fit this string. */
type = sdsReqType(len);
hdrlen = sdsHdrSize(type);

/* If the type is the same, or at least a large enough type is still
* required, we just realloc(), letting the allocator to do the copy
* only if really needed. Otherwise if the change is huge, we manually
* reallocate the string to use the different header type. */
if (oldtype==type || type > SDS_TYPE_8) {
newsh = s_realloc(sh, oldhdrlen+len+1);
if (newsh == NULL) return NULL;
s = (char*)newsh+oldhdrlen;
} else {
newsh = s_malloc(hdrlen+len+1);
if (newsh == NULL) return NULL;
memcpy((char*)newsh+hdrlen, s, len+1);
s_free(sh);
s = (char*)newsh+hdrlen;
s[-1] = type;
sdssetlen(s, len);
}
sdssetalloc(s, len);
return s;
}