Home > Backend Development > PHP Tutorial > nginx data structure 2 - rewrite the red-black tree yourself

nginx data structure 2 - rewrite the red-black tree yourself

WBOY
Release: 2016-07-30 13:31:21
Original
992 people have browsed it

Without further ado, let’s rewrite the code. This time I will use the comments written in English as a review of English.

rbtree.h:

/*
 * Copyright (C) Bipedal Bit
 * Verson 1.0.0.1
 */

#ifndef _RBTREE_H_INCLUDED_
#define _RBTREE_H_INCLUDED_

/* the node structure of the red-black tree */
typedef struct rbtree_node_s rbtree_node_t;
/* Using type int means its range is -0x7fffffff-1~0x7fffffff. */
typedef int rbtree_key_t;
/* Abstract type is complicated to achieve with C so I use char* instead. */
typedef char* rbtree_data_t;

struct rbtree_node_s
{
	/* key of the node */
	rbtree_key_t	key;
	/* pointer of the parent of the node */
	rbtree_node_t*	parent;
	/* pointer of the left kid of the node */
	rbtree_node_t*	left;
	/* pointer of the right kid of the node */
	rbtree_node_t*	right;
	/* color of the node */
	unsigned char	color;
	/* pointer of the value of the node corresponding to the key */
	rbtree_data_t	value;
};

/* the tree object stucture of the red-black tree */
typedef struct rbtree_s rbtree_t;
/* foundational insert function pointer*/
typedef void (*rbtree_insert_p) (rbtree_t* root, rbtree_node_t* node);

struct rbtree_s
{
	/* the pointer of the root node of the tree */
	rbtree_node_t* root;
	/* black leaf nodes as sentinel */
	rbtree_node_t* sentinel;
	/* the polymorphic insert function pointer */
	rbtree_insert_p insert;
};

/* macros */
#define rbtree_init(tree, s, i)		\
rbtree_sentinel_init(s);			\
(tree)->root = s;				\
(tree)->sentinel = s;			\
(tree)->insert = i

#define rbtree_red(node)	((node)->color = 1)
#define rbtree_black(node)	((node)->color = 0)
#define rbtree_is_red(node)	((node)->color)
#define rbtree_is_black(node)	(!rbtree_is_red(node))
 /* copy n2's color to n1 */
#define rbtree_copy_color(n1, n2)	(n1->color = n2->color)
/* sentinel must be black cuz it's leaf node */
#define rbtree_sentinel_init(node)	rbtree_black(node)

/* statements of public methods */
void rbtree_insert_value(rbtree_t* tree, rbtree_node_t* node);
void rbtree_insert(rbtree_t* tree, rbtree_node_t* node);
void rbtree_delete(rbtree_t* tree, rbtree_node_t* node);
rbtree_node_t* rbtree_find(rbtree_t* tree, rbtree_key_t key);

#endif	/* _RBTREE_H_INCLUDED_ */
Copy after login

Those who have read the nginx source code will find that my header file has not changed much compared to ngx_rbree.h, it is very similar.

The key rbtree.c:

/*
 * Copyright (C) Bipedal Bit
 * Verson 1.0.0.1
 */

#include <stddef.h>
#include "rbtree.h"

/* inline methods */
/* get the node with the minimum key in a subtree of the red-black tree */
static inline rbtree_node_t*
rbtree_subtree_min(rbtree_node_t* node, rbtree_node_t* sentinel)
{
    while(node->left != sentinel)
    {
        node = node->left;
    }

    return node;
}

/* replace the node "node" in the tree with node "tmp" */
static inline void rbtree_replace(rbtree_t* tree,
    rbtree_node_t* node, rbtree_node_t* tmp)
{
    /* upward: p[node] <- p[tmp] */
&#160;&#160; &#160;tmp->parent = node->parent;

    if (node == tree->root)
    {
        tree->root = tmp;
    }
    else if (node == node->parent->left)
    {
        /* downward: left[p[node]] <- tmp */
&#160;&#160; &#160;&#160;&#160; &#160;node->parent->left = tmp;
    }
    else
    {
        /* downward: right[p[node]] <- tmp */
&#160;&#160; &#160;&#160;&#160; &#160;node->parent->right = tmp;
    }

    node->parent = tmp;
}

/* change the topologic structure of the tree keeping the order of the nodes */
static inline void rbtree_left_rotate(rbtree_t* tree, rbtree_node_t* node)
{
    /* node as the var x in CLRS while tmp as the var y */
    rbtree_node_t* tmp = node->right;

    /* replace y with left[y] */
    /* downward: right[x] <- left[y] */
&#160;&#160; &#160;node->right = tmp->left;
    /* if left[[y] is not NIL it has a parent */
    if (tmp->left != tree->sentinel)
    {
        /* upward: p[left[y]] <- x */
&#160;&#160; &#160;&#160;&#160; &#160;tmp->left->parent = node;
    }

    /* replace x with y */
    rbtree_replace(tree, node, tmp);
    tmp->left = node;
}

static inline void rbtree_right_rotate(rbtree_t* tree, rbtree_node_t* node)
{
    rbtree_node_t* tmp = node->left;

    /* replace y with right[y] */
    node->left = tmp->right;
    if (tmp->right != tree->sentinel)
    {
        tmp->right->parent = node;
    }

    /* replace x with y */
    rbtree_replace(tree, node, tmp);
    tmp->right = node;
}

/* static methods */
/* fix the red-black tree after the new node inserted */
static void rbtree_insert_fixup(rbtree_t* tree, rbtree_node_t* node)
{
    while(rbtree_is_red(node->parent))
    {
        if (node->parent == node->parent->parent->left)
        {
            /* case 1: node's uncle is red */
            if (rbtree_is_red(node->parent->parent->right))
            {
                rbtree_black(node->parent);
                rbtree_black(node->parent->parent->right);
                rbtree_red(node->parent->parent);
                node = node->parent->parent;
                /* Then we can consider the whole subtree */
                /* which is represented by the new "node" as the "node" before */
                /* and keep looping till "node" become the root. */
            }
            /* case 2: node's uncle is black */
            else
            {
                /* ensure node is the left kid of its parent */
                if (node == node->parent->right)
                {
                    node = node->parent;
                    rbtree_left_rotate(tree, node);
                }
                /* case 2 -> case 1 */
                rbtree_black(node->parent);
                rbtree_red(node->parent->parent);
                rbtree_right_rotate(tree, node->parent->parent);
            }
        }
        /* same as the "if" clause before with "left" and "right" exchanged */
        else
        {
            if (rbtree_is_red(node->parent->parent->left))
            {
                rbtree_black(node->parent);
                rbtree_black(node->parent->parent->left);
                rbtree_red(node->parent->parent);
                node = node->parent->parent;
            }
            else
            {
                if (node == node->parent->left)
                {
                    node = node->parent;
                    rbtree_right_rotate(tree, node);
                }
                rbtree_black(node->parent);
                rbtree_red(node->parent->parent);
                rbtree_left_rotate(tree, node->parent->parent);
            }
        }
    }
    /* ensure the root node being black */
    rbtree_black(tree->root);
}

static void rbtree_delete_fixup(rbtree_t* tree, rbtree_node_t* node)
{
    rbtree_node_t* brother = NULL;

    while(node != tree->root && rbtree_is_black(node))
    {
        if (node == node->parent->left)
        {
            brother = node->parent->right;
            if (rbtree_is_red(brother))
            {
                rbtree_black(brother);
                rbtree_red(node->parent);
                rbtree_left_rotate(tree, node->parent);
                /* update brother after topologic change of the tree */
                brother = node->parent->right;
            }

            if (rbtree_is_black(brother->left) && rbtree_is_black(brother->right))
            {
                rbtree_red(brother);
                /* go upward and keep on fixing color */
                node = node->parent;
            }
            else
            {
                if (rbtree_is_black(brother->right))
                {
                    rbtree_black(brother->left);
                    rbtree_red(brother);
                    rbtree_right_rotate(tree, brother);
                    /* update brother after topologic change of the tree */
                    brother = node->parent->right;
                }
                rbtree_copy_color(brother, node->parent);
                rbtree_black(node->parent);
                rbtree_black(brother->right);
                rbtree_left_rotate(tree, node->parent);
                /* end the loop and ensure root is black */
                node = tree->root;
            }
        }
        /* same as the "if" clause before with "left" and "right" exchanged */
        else
        {
            brother = node->parent->left;
            if (rbtree_is_red(brother))
            {
                rbtree_black(brother);
                rbtree_red(node->parent);
                rbtree_left_rotate(tree, node->parent);
                brother = node->parent->left;
            }

            if (rbtree_is_black(brother->left) && rbtree_is_black(brother->right))
            {
                rbtree_red(brother);
                node = node->parent;
            }
            else
            {
                if (rbtree_is_black(brother->left))
                {
                    rbtree_black(brother->right);
                    rbtree_red(brother);
                    rbtree_right_rotate(tree, brother);
                    brother = node->parent->left;
                }
                rbtree_copy_color(brother, node->parent);
                rbtree_black(node->parent);
                rbtree_black(brother->left);
                rbtree_left_rotate(tree, node->parent);
                node = tree->root;
            }
        }
    }

    rbtree_black(node);
}

/* public methods */
void rbtree_insert_value(rbtree_t* tree, rbtree_node_t* node)
{
    /* Using ** to know wether the new node will be a left kid */
    /* or a right kid of its parent node. */
    rbtree_node_t** tmp = &tree->root;
    rbtree_node_t* parent;

    while(*tmp != tree->sentinel)
    {
        parent = *tmp;
        tmp = (node->key < parent->key) ? &parent->left : &parent->right;
    }

    /* The pointer knows wether the node should be on the left side */
    /* or on the right one. */
    *tmp = node;
    node->parent = parent;
    node->left = tree->sentinel;
    node->right = tree->sentinel;
    rbtree_red(node);
}

void rbtree_insert(rbtree_t* tree, rbtree_node_t* node)
{
    rbtree_node_t* sentinel = tree->sentinel;

    /* if the tree is empty */
    if (tree->root == sentinel)
    {
        tree->root = node;
        node->parent = sentinel;
        node->left = sentinel;
        node->right = sentinel;
        rbtree_black(node);

        return;
    }

    /* generally */
    tree->insert(tree, node);
    rbtree_insert_fixup(tree, node);
}

void rbtree_delete(rbtree_t* tree, rbtree_node_t* node)
{
    rbtree_node_t* sentinel = tree->sentinel;
    /* wether "node" is on the left side or the right one */
    rbtree_node_t** ptr_to_node = NULL;
    /* "cover" is the node which is going to cover "node" */
    rbtree_node_t* cover = NULL;
    /* wether we lossing a red node on the edge of the tree */
    int loss_red = rbtree_is_red(node);
    int is_root = (node == tree->root);

    /* get "cover" & "loss_red"  */
    /* sentinel in "node"'s kids */
    if (node->left == sentinel)
    {
        cover = node->right;
    }
    else if (node->right == sentinel)
    {
        cover = node->left;
    }
    /* "node"'s kids are both non-sentinel */
    else
    {
        /* update "node" & "loss_red" & "is_root" & "cover" */
        cover = rbtree_subtree_min(node->right, sentinel);
        node->key = cover->key;
        node->value = cover->value;
        node = cover;
        loss_red = rbtree_is_red(node);
        is_root = 0;
        /* move "cover"'s kids */
        /* "cover" can only be a left kid */
        /* and can only have a right non-sentinel kid */
        /* because of function "rbtree_subtree_min" */
        cover = node->right;
    }

    if (is_root)
    {
        /* update root */
        tree->root = cover;
    }
    else
    {
        /* downward link */
        if (node == node->parent->left)
        {
            node->parent->left = cover;
        }
        else
        {
            node->parent->right = cover;
        }
    }
    /* upward link */
    cover->parent = node->parent;
    /* "cover" may be a sentinel */
    if (cover != sentinel)
    {
        /* set "cover" */
        cover->left = node->left;
        cover->right = node->right;
        rbtree_copy_color(cover, node);
    }

    /* clear "node" since it's useless */
    node->key = -1;
    node->parent = NULL;
    node->left = NULL;
    node->right = NULL;
    node->value = NULL;

    if (loss_red)
    {
        return;
    }

    /* When lossing a black node on edge */
    /* the fifth rule of red-black tree will be broke. */
    /* So the tree need to be fixed. */
    rbtree_delete_fixup(tree, cover);
}

/* find the node in the tree corresponding to the given key value */
rbtree_node_t* rbtree_find(rbtree_t* tree, rbtree_key_t key)
{
    rbtree_node_t* tmp = tree->root;
    int step_cnt = 0;

    /* search the binary tree */
    while(tmp != tree->sentinel)
    {
        /* next line is just fot test */
        // step_cnt++;
        if(key == tmp->key)
        {
            /* next line is just for test */
            // printf("step count: %d, color: %s, ", step_cnt, rbtree_is_red(tmp) ? "red" : "black");
            return tmp;
        }

        tmp = (key < tmp->key) ? tmp->left : tmp->right;
    }

    return NULL;
}
 
Copy after login

Although I understand that the long function body of 100+ lines in the nginx source code is also an optimization to avoid too many function calls that increase time and space overhead, I still classify all functions into 100 lines the following. Increasing readability is one thing, but it may also be a bit obsessive-compulsive. Later, several statistical methods will be expanded, such as max, min and mid, and a traversal method will also be expanded.

The following is the call test, test.c:

#include <stdio.h>
#include "rbtree.h"

int main(int argc, char const *argv[])
{
    rbtree_t t = {};
    rbtree_node_t s = {};
    rbtree_init(&t, &s, rbtree_insert_value);

    const int cnt = 10;
    const int max_len = 15;

#define TEST_VALUES {"apple", "banana", "cherry", "grape", "lemon", "mango", "pear", "pineapple", "strawberry", "watermelon"}

    /* for gcc */
    char* v[] = TEST_VALUES;
    /* for g++ */
    // char v[][max_len] = TEST_VALUES;

    rbtree_node_t n[cnt];
    int i;
    for (i = 0; i < cnt; i++)
&#160;&#160; &#160;{
&#160;&#160; &#160;&#160;&#160; &#160;n[i].key = i+1;
&#160;&#160; &#160;&#160;&#160; &#160;n[i].value = v[i];
&#160;&#160; &#160;&#160;&#160; &#160;rbtree_insert(&t, &n[i]);
&#160;&#160; &#160;}

&#160;&#160; &#160;rbtree_node_t* p[cnt];

&#160;&#160; &#160;for (i = 1; i <= cnt; i++)
&#160;&#160; &#160;{
&#160;&#160; &#160;&#160;&#160; &#160;printf("key: %d\n", i);
&#160;&#160; &#160;&#160;&#160; &#160;p[i] = rbtree_find(&t, i);
&#160;&#160; &#160;&#160;&#160; &#160;printf("value: %s\n", (p[i] != NULL) ? p[i]->value : "?");
    }

    rbtree_delete(&t, &n[5]);

    printf("\nafter delete 6->mango:\n\n");

    for (i = 1; i <= cnt; i++)
&#160;&#160; &#160;{
&#160;&#160; &#160;&#160;&#160; &#160;printf("key: %d\n", i);
&#160;&#160; &#160;&#160;&#160; &#160;p[i] = rbtree_find(&t, i);
&#160;&#160; &#160;&#160;&#160; &#160;printf("value: %s\n", (p[i] != NULL) ? p[i]->value : "?");
    }

    return 0;
}
Copy after login

Unlock the test line comment in the rbtree_find method and execute it smoothly:

key: 1
step count: 3, color: black, value: apple
key: 2
step count: 2, color: black, value: banana
key: 3
step count: 3, color: black, value: cherry
key: 4
step count: 1, color: black, value: grape
key: 5
step count: 3, color: black, value: lemon
key: 6
step count: 2, color: black, value: mango
key: 7
step count: 4, color: black, value: pear
key: 8
step count: 3, color: red, value: pineapple
key: 9
step count: 4, color: black, value: strawberry
key: 10
step count: 5, color: red, value: watermelon

after delete 6->mango:

key: 1
step count: 3, color: black, value: apple
key: 2
step count: 2, color: black, value: banana
key: 3
step count: 3, color: black, value: cherry
key: 4
step count: 1, color: black, value: grape
key: 5
step count: 3, color: black, value: lemon
key: 6
value: ?
key: 7
step count: 2, color: black, value: pear
key: 8
step count: 4, color: black, value: pineapple
key: 9
step count: 3, color: red, value: strawberry
key: 10
step count: 4, color: black, value: watermelon
Copy after login
The following are the red-black tree before deleting 6->mango and the red tree after deletion Black tree diagram:

Let’s do a stress test on a large amount of data. Be careful to comment out the test lines in the rbtree_find method, otherwise the consequences may be scary:

#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include "rbtree.h"

int main(int argc, char const *argv[])
{
    double duration;
    double room;

    rbtree_t t = {};
    rbtree_node_t s = {};
    rbtree_init(&t, &s, rbtree_insert_value);

    const int cnt = 1<<20;
    const int max_len = 15;

#define TEST_VALUES {"apple", "banana", "cherry", "grape", "lemon", "mango", "pear", "pineapple", "strawberry", "watermelon"}

    /* for gcc */
    char* v[] = TEST_VALUES;
    /* for g++ */
    // char v[][max_len] = TEST_VALUES;

    /* Default stack size in Ubuntu Kylin 14.04 is 8MB. */
    /* It's not enough. So I use memory in heap which offers a lot larger room. */
    rbtree_node_t* n = (rbtree_node_t*)calloc(cnt, sizeof(rbtree_node_t));
    int i;

    long time1 = clock();

    for (i = 0; i < cnt; i++)
    {
        n[i].key = i+1;
        n[i].value = v[i%10];
        rbtree_insert(&t, &n[i]);
    }

    long time2 = clock();
    room = 48.0*cnt/(1<<20);
    duration = (double)(time2 - time1) / CLOCKS_PER_SEC;
    printf("Inserting %d nodes costs %.2fMB and spends %f seconds.\n", cnt, room, duration);

    const int search_cnt = 1<<10;
    srand( (unsigned int)time(0) );
    for( i = 0 ; i < search_cnt ; i++ )
    {
        rbtree_find(&t, (rand()%cnt)+1);
    }

    long time3 = clock();
    duration = (double)(time3 - time2) / CLOCKS_PER_SEC;
    printf("Searching %d nodes among %d spends %f seconds.\n", search_cnt, cnt, duration);

    const int delete_cnt = 1<<10;
    int nums[delete_cnt];
    int num;
    /* Let's hash! */
    char* mark = (char*)calloc(cnt, sizeof(char));
    memset(mark, 0, cnt*sizeof(char));
    for(i = 0; i < delete_cnt; i++)
    {
        for(;;)
        {
            num = rand()%cnt;
            if (mark[num] == 0)
            {
                mark[num] = 1;
                nums[i] = num;
                break;
            }
        }
    }

    long time4 = clock();
    duration = (double)(time4 - time3) / CLOCKS_PER_SEC;
    printf("Hash %d times spends %f seconds.\n", delete_cnt, duration);

    for(i = 0; i < delete_cnt; i++)
    {
        rbtree_delete(&t, &n[nums[i]]);
    }

    long time5 = clock();
    duration = (double)(time5 - time4) / CLOCKS_PER_SEC;
    printf("Deleting %d nodes among %d spends %f seconds.\n", delete_cnt, cnt, duration);
    free(mark);
    free(n);

    return 0;
}
Copy after login

Let’s take a look at the results:

Inserting 1048576 nodes costs 48.00MB and spends 0.425416 seconds.
Searching 1024 nodes among 1048576 spends 0.001140 seconds.
Hash 1024 times spends 0.000334 seconds.
Deleting 1024 nodes among 1048576 spends 0.000783 seconds.
Copy after login
Deletion is faster than search, and it only takes a little more than twice as long as hash search. Millions of insertions also take less than half a second. Well, I am quite satisfied.

I am going to write statistics and traversal methods.

Copyright Statement: This article is an original article by the blogger and may not be reproduced without the blogger's permission.

The above introduces the nginx data structure 2 - rewriting the red-black tree by yourself, including the relevant content. I hope it will be helpful to friends who are interested in PHP tutorials.

Related labels:
source:php.cn
Statement of this Website
The content of this article is voluntarily contributed by netizens, and the copyright belongs to the original author. This site does not assume corresponding legal responsibility. If you find any content suspected of plagiarism or infringement, please contact admin@php.cn
Popular Tutorials
More>
Latest Downloads
More>
Web Effects
Website Source Code
Website Materials
Front End Template