In the Redis system, a more complete encapsulation of strings is established, a dynamic string is established, and a large number of practical APIs are built. The relevant implementation codes are sds.h and sds.c. The following are my source code reading notes. There is a lot of content, and it will be updated gradually
typedef char *sds; struct __attribute__ ((__packed__)) sdshdr5 { usigned char flags; char buf[]; }; struct __attribute__ ((__packed__)) sdshdr8 { uint8_t len; uint8_t alloc; unsigned char flags; char buf[]; }; struct __attribute__ ((__packed__)) sdshdr16 { uint16_t len; uint16_t alloc; unsigned char flags; char buf[]; }; struct __attribute__ ((__packed__)) sdshdr32 { uint32_t len; uint32_t alloc; unsigned char flags; char buf[]; }; struct __attribute__ ((__packed__)) sdshdr64 { uint64_t len; uint64_t alloc; unsigned char flags; char buf[]; }; #define SDS_TYPE_5 0 #define SDS_TYPE_8 1 #define SDS_TYPE_16 2 #define SDS_TYPE_32 3 #define SDS_TYPE_64 4 #define SDS_TYPE_MASK 7 #define SDS_TYPE_BITS 3 #define SDS_HDR_VAR(T,s) struct sdshdr##T *sh = (void*)((s)-(sizeof(struct sdshdr##T))); #define SDS_HDR(T,s) ((struct sdshdr##T *)((s)-(sizeof(struct sdshdr##T)))) #define SDS_TYPE_5_LEN(f) ((f)>>SDS_TYPE_BITS)
The above is the structure of the dynamic string Body declaration and define function declaration. There are 5 types of dynamic strings, which are suitable for strings of different lengths. What I call here is the head of the dynamic string.
sdshdr5: A string with a length less than 32
sdshdr8: A string with a length less than 256
sdshdr16: A string whose length is less than 2^16
sdshdr32: A string whose length is less than 2^32. One thing to note here is that if the LONG_MAXbu of the machine is not equal to LLONG_MAX, the sdshdr64 type will be returned.
sdshdr64: This class applies to all other lengths.
sdshdr5 This type is different from other types. It lacks len members and alloc members, and its judgment and processing are quite special. However, there is an official comment in the code, as follows:
/* Note: sdshdr5 is never used, we just access the flags byte directly.
* However is here to document the layout of type 5 SDS strings. */
The comments indicate that this type has never been used, so we will not consider it here. In fact, its processing operations are essentially no different from other types. For convenience, we conduct research in general types.
There are four classes in the structure, namely len, alloc, flags and buf.
len: The length of the string.
alloc: The total size of string memory. Note that alloc is different from len. len is the length of the actual string, and alloc is the actual allocated memory size (excluding the size of the sds header and trailing '\0'). In order to reduce repeated re-application of memory when the string content increases, redis will apply for more memory for use. When the string size is less than 1MB, apply for twice the memory size. When the string size is greater than or equal to 1MB, apply for 1MB more memory for use. For detailed settings, please see the analysis of the sdsMakeRoomFor function later.
flags: Used as a mark to distinguish different types. Currently, only the lower 3 bits are used to mark the mark. The upper 5 bits are not used yet and can be used to add new ones in the future. function is used. The SDS_TYPE_* type declared by define in the above code is the corresponding tag content, which is used to distinguish different string types. For example, when flags is equal to SDS_TYPE_8, you can get 17 bytes forward from the beginning of the string, or get 8 bytes of data from the head of the string to get the actual length of the current string. When equal to SDS_TYPE_16, get 33 bytes forward from the beginning of the string, or get 16 from the head of the string to get the actual length of the current string. The combined use of flags and header information will appear in large numbers in subsequent function analysis.
buf:实际存储字符串内容的数组,同传统数组一样,结尾需要'\0'字符。
在sdshdr的声明当中,我们可以看到 __attribute__ ((__packed__)) 关键字,这将使这个结构体在内存中不再遵守字符串对齐规则,而是以内存紧凑的方式排列。所以可以从字符串位置开始向前一个字节便可以获取flags信息了,比如buf[-1]。具体__attribute__ ((__packed__))与字符串对齐的内容请查看另一篇博客。
SDS_HDR_VAR函数则通过结构体类型与字符串开始字节,获取到动态字符串头部的开始位置,并赋值给sh指针。SDS_HDR函数则通过类型与字符串开始字节,返回动态字符串头部的指针。使用方式为可在之后的代码当中看到,具体define声明中的双'#'号的使用方式与意义,请看草另一篇博客。
sds比起传统的字符串,有着自己优势跟便利之处。
1、内存预分配:sds通过预先分配了更多的内存量,来避免频繁的申请、分配内存,无端的浪费了性能
2、惰性释放:sds作为普通字符串使用之时,可以通过在不同字节打上'\0'字符,来代表字符串的截断及减少长度,而不是需要清空多余的字节并释放它们,那些内存再之后的操作当中可以当做空闲内存继续使用。
3、二进制安全:作为非字符串使用存储数据之时,通过善用头部的len属性,可以存储一些包含'\0'字符的数据。当然,一定要善用len属性,api当中,如长度更新的函数,同样通过'\0'字符来判断结尾!
接下来开始介绍sds相关的api函数,第一批是声明、定义在sds.h文件内的静态函数,这些函数都是针对动态字符串头部的属性的获取与修改,简单易懂
//获取动态字符串长度 static inline size_t sdslen(const sds s) { unsigned char flags = s[-1];//获取头部信息中的flags属性,因内存紧密相连,可以直接通过这种方式获取 switch(flags&SDS_TASK_MASK) {//获取类型,SDS_TASK_MASK为7,所以flags&SDS_TASK_MASK等于flags case SDS_TYPE_5: return SDS_TYPE_5_LEN(flags);//SDS_TYPE_5类型的长度获取稍微不同,它的长度被定义在flags的高5位当中,具体可查看之后的sdsnewlen函数,或者下面的sdssetlen函数 case SDS_TYPE_8: return SDS_HDR(8,s)->len;//SDS_HDR函数通过类型与字符串开始字节获取头部,以此获取字符串的长度 case SDS_TYPE_16: return SDS_HDR(16,s)->len; case SDS_TYPE_32: return SDS_HDR(32,s)->len; case SDS_TYPE_64: RETURN SDS_HDR(64,S)->len; } return 0; } //获取动态字符串的剩余内存 static inline size_t sdsavail(const sds s) { unsigned char flags = s[-1];//获取flags switch(flags&SDS_TYPE_MASK) { case SDS_TYPE_5://SDS_TYPE_5直接返回0, return 0; case SDS_TYPE_8: { SDS_HDR_VAR(8,s);//通过SDS_HDR_VAR函数,将头部指针放置在sh变量 return sh->alloc - sh->len;//总内存大小 - 字符串长度,获取可用内存大小 } case SDS_TYPE_16: { SDS_HDR_VAR(16,s); return sh->alloc - sh->len; } case SDS_TYPE_32: { SDS_HDR_VAR(32,s); return sh->alloc - sh->len; } case SDS_TYPE_64: { SDS_HDR_VAR(64,s); return sh->alloc - sh-len; } } return 0; } //重置字符串长度 static inline void sdssetlen(sds s, size_t newlen) { unsigned char flags = s[-1];//获取flags switch(flags&SDS_TASK_MASK) { //SDS_TYPE_5的长度设置较为特殊,长度信息写在flags的高5位 case SDS_TYPE_5: { unsigned char *fp = ((unsigned char*)s)-1; *fp = SDS_TYPE_5 | (newlen << SDS_TYPE_BITS); } break; //其他类型则是统一修改len属性的值 case SDS_TYPE_8: SDS_HDR(8,s)->len = newlen; break; case SDS_TYPE_16: SDS_HDR(16,s)->len = newlen; break; case SDS_TYPE_32: SDS_HDR(32,s)->len = newlen; break; case SDS_TYPE_64: SDS_HDR(64,s)->len = newlen; break; } } //按照指定数值,增加字符串长度 static inline void sdsinclen(sds s, size_t inc) { unsigned char flags = s[-1];//获取flags switch(flags&SDS_TYPE_MASK) { //SDS_TYPE_5类型使用上面的函数,获取长度、更新、设置 case SDS_TYPE_5: { unsigned char *fp = ((unsigned char*)s)-1; unsigned char newlen = SDS_TYPE_LEN(flags)+inc; *fp = SDS_TYPE_5 | (newlen << SDS_TYPE_BITS); } break; //其他类型则直接通过SDS_HDR函数,更新len值 case SDS_TYPE_8: SDS_HDR(8,s)->len += inc; break; case SDS_TYPE_16: SDS_HDR(16,s)->len += inc; break; case SDS_TYPE_32: SDS_HDR(32,s)->len += inc; break; case SDS_TYPE_64: SDS_HDR(64,s)->len += inc; break; } } //获取动态字符串的总内存 static inline size_t sdsalloc(const sds s) { unsigned char flags = s[-1];//获取flags switch(flags&SDS_TASK_MASK) { //SDS_TYPE_5直接通过SDS_TYPE_5_LEN函数返回 case SDS_TYPE_5: return SDS_TYPE_5_LEN(flags); //其他类型则返回头部信息中的alloc属性 case SDS_TYPE_8: return SDS_HDR(8,s)->alloc; case SDS_TYPE_16: return SDS_HDR(16,s)->alloc; case SDS_TYPE_32: return SDS_HDR(32,s)->alloc; case SDS_TYPE_64: return SDS_HDR(64,s)->alloc; } return 0; } //重置字符串内存大小 static inline size_t sdssetalloc(sds s, size_t newlen) { unsigned cahr flags = s[-1];//获取flags switch(flags&SDS_TASK_MASK) { case SDS_TYPE_5: //官方注释,SDS_TYPE_5不做任何操作 /*Nothing to do, this type has no total allocation info. */ break; //其他类型直接修改头部信息中的alloc属性 case SDS_TYPE_8: SDS_HDR(8,s)->alloc = newlen; break; case SDS_TYPE_16: SDS_HDR(16,s)->alloc = newlen; break; case SDS_TYPE_32: SDS_HDR(32,s)->alloc = newlen; break; case SDS_TYPE_64: SDS_HDR(64,s)->alloc = newlen; break; } }
上述的几个函数,sdslen,sdsavail,sdssetlen,sdsinclen,sdsalloc,sdssetalloc函数,都是基本的头部属性操作函数。代码的难度也不大,可以直观的阅读、理解。
接下来的sds的相关api,数量有点多,之后的dict、zskiplist也是有大量api,挑部分代码较多,需要逐行理解的函数来记录、分析。
//创建一个新的sds对象 sds sdsnewlen(const void *init, size_t initlen) { void *sh; sds s; char type = sdsReqType(initlen);//根据初始化长度获取对应结构体类型 if (type == SDS_TYPE_5 && initlen == 0) type = SDS_TYPE_8;//若长度为0的则初始化为SDS_TYPE_8类型 int hdrlen = sdsHdrSize(type); unsigned char *fp; sh = s_malloc(hdrlen+initlen+1);//申请足够的内存,头部大小+初始化大小+结尾符 if (!init) memset(sh, 0, hdrlen+initlen+1); if (sh == NULL) return NULL; s = (char*)sh+hdrlen;//获取字符串起始字节 fp = ((unsigned char*)s)-1;//获取flags字节 switch(type) { //SDS_TYPE_5又是特立独行了,有自己的初始化方案,我都懒得说明了。。。 case SDS_TYPE_5: { *fp = type | (initlen << SDS_TYPE_BITS); break; } //其他类型通过SDS_HDR_VAR函数获取头部信息,并逐步初始化 case SDS_TYPE_8: { SDS_HDR_VAR(8,s); sh->len = initlen; sh->alloc = initlen; *fp = type; break; } case SDS_TYPE_16: { SDS_HDR_VAR(16,s); sh->len = initlen; sh->alloc = initlen; *fp = type; break; } case SDS_TYPE_32: { SDS_HDR_VAR(32,s); sh->len = initlen; sh->alloc = initlen; *fp = type; break; } case SDS_TYPE_64: { SDS_HDR_VAR(64,s); sh->len = initlen; sh->alloc = initlen; *fp = type; break; } } //根据init与initlen,将内容复制给字符串 if (initlen && init) memcpy(s, init, initlen); //打上结尾符 s[initlen] = '\0'; return s; }
sdsnewlen根据参数给予的init字符串与initlen初始长度,生成并返回一个动态字符串。代码都打上了注释,阅读已经没什么难度了。
相关推荐:
The above is the detailed content of Redis source code analysis. For more information, please follow other related articles on the PHP Chinese website!