您的位置:首页 > 数据库

数据库原理学习经验(B树与索引)

2018-01-12 16:41 302 查看
step 1:

数据库的最简单实现

重点理解为什么要用B树,为什么要有索引

step 2:

实现一个B树,代码理解参考下面

http://blog.csdn.net/qifengzou/article/details/21079325

#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <errno.h>
typedef struct _btree_node_t
{
int num;                        /* 关键字个数 */
int *key;                       /* 关键字:所占空间为(max+1) - 多出来的1个空间用于交换空间使用 */
struct _btree_node_t **child;   /* 子结点:所占空间为(max+2)- 多出来的1个空间用于交换空间使用 */
struct _btree_node_t *parent;   /* 父结点 */
}btree_node_t;

typedef struct
{
int max;                        /* 单个结点最大关键字个数 - 阶m=max+1 */
int min;                        /* 单个结点最小关键字个数 */
int sidx;                       /* 分裂索引 = (max+1)/2 */
btree_node_t *root;             /* B树根结点地址 */
}btree_t;

static int btree_merge(btree_t *btree, btree_node_t *node);
static int _btree_merge(btree_t *btree, btree_node_t *left, btree_node_t *right, int mid);
static btree_node_t *btree_creat_node(btree_t *btree)
{
btree_node_t *node = NULL;

node = (btree_node_t *)calloc(1, sizeof(btree_node_t));
if(NULL == node) {
fprintf(stderr, "[%s][%d] errmsg:[%d] %s\n", __FILE__, __LINE__, errno, strerror(errno));
return NULL;
}

node->num = 0;

/* More than (max) is for move */
node->key = (int *)calloc(btree->max+1, sizeof(int));
if(NULL == node->key) {
free(node), node=NULL;
fprintf(stderr, "[%s][%d] errmsg:[%d] %s\n", __FILE__, __LINE__, errno, strerror(errno));
return NULL;
}

/* More than (max+1) is for move */
node->child = (btree_node_t **)calloc(btree->max+2, sizeof(btree_node_t *));
if(NULL == node->child) {
free(node->key);
free(node), node=NULL;
fprintf(stderr, "[%s][%d] errmsg:[%d] %s\n", __FILE__, __LINE__, errno, strerror(errno));
return NULL;
}

return node;
}

btree_t* btree_creat(int m)
{
btree_t *btree = NULL;

if(m < 3) {
fprintf(stderr, "[%s][%d] Parameter 'max' must geater than 2.\n", __FILE__, __LINE__);
return NULL;
}

btree = (btree_t *)calloc(1, sizeof(btree_t));
if(NULL == btree) {
fprintf(stderr, "[%s][%d] errmsg:[%d] %s!\n", __FILE__, __LINE__, errno, strerror(errno));
return NULL;
}

btree->max= m - 1;
btree->min = m/2;
if(0 != m%2) {
btree->min++;
}
btree->min--;
btree->sidx = m/2;
btree->root = NULL; /* 空树 */

return btree;
}

static int btree_split(btree_t *btree, btree_node_t *node)
{
int idx = 0, total = 0, sidx = btree->sidx;
btree_node_t *parent = NULL, *node2 = NULL;

while(node->num > btree->max) {
/* Split node */
total = node->num;

node2 = btree_creat_node(btree);
if(NULL == node2) {
fprintf(stderr, "[%s][%d] Create node failed!\n", __FILE__, __LINE__);
return -1;
}

/* Copy data */
memcpy(node2->key, node->key + sidx + 1, (total-sidx-1) * sizeof(int));
memcpy(node2->child, node->child+sidx+1, (total-sidx) * sizeof(btree_node_t *));

node2->num = (total - sidx - 1);
node2->parent  = node->parent;

node->num = sidx;
/* Insert into parent */
parent  = node->parent;
if(NULL == parent)  {
/* Split root node */
parent = btree_creat_node(btree);
if(NULL == parent) {
fprintf(stderr, "[%s][%d] Create root failed!", __FILE__, __LINE__);
return -1;
}

btree->root = parent;
parent->child[0] = node;
node->parent = parent;
node2->parent = parent;

parent->key[0] = node->key[sidx];
parent->child[1] = node2;
parent->num++;
}
else {
/* Insert into parent node */
for(idx=parent->num; idx>0; idx--) {
if(node->key[sidx] < parent->key[idx-1]) {
parent->key[idx] = parent->key[idx-1];
parent->child[idx+1] = parent->child[idx];
continue;
}
break;
}

parent->key[idx] = node->key[sidx];
parent->child[idx+1] = node2;
node2->parent = parent;
parent->num++;
}

memset(node->key+sidx, 0, (total - sidx) * sizeof(int));
memset(node->child+sidx+1, 0, (total - sidx) * sizeof(btree_node_t *));

/* Change node2's child->parent */
for(idx=0; idx<=node2->num; idx++) {
if(NULL != node2->child[idx]) {
node2->child[idx]->parent = node2;
}
}
node = parent;
}

return 0;
}

static int _btree_insert(btree_t *btree, btree_node_t *node, int key, int idx)
{
int i = 0;

/* 1. 移动关键字:首先在最底层的某个非终端结点上插入一个关键字,因此该结点无孩子结点,故不涉及孩子指针的移动操作 */
for(i=node->num; i>idx; i--) {
node->key[i] = node->key[i-1];
}

node->key[idx] = key; /* 插入 */
node->num++;

/* 2. 分裂处理 */
if(node->num > btree->max) {
return btree_split(btree, node);
}

return 0;
}

int btree_insert(btree_t *btree, int key)
{
int idx = 0;
btree_node_t *node = btree->root;

/* 1. 构建第一个结点 */
if(NULL == node) {
node = btree_creat_node(btree);
if(NULL == node) {
fprintf(stderr, "[%s][%d] Create node failed!\n", __FILE__, __LINE__);
return -1;
}

node->num = 1;
node->key[0] = key;
node->parent = NULL;

btree->root = node;
return 0;
}

/* 2. 查找插入位置:在此当然也可以采用二分查找算法,有兴趣的可以自己去优化 */
while(NULL != node) {
for(idx=0; idx<node->num; idx++) {
if(key == node->key[idx]) {
fprintf(stderr, "[%s][%d] The node is exist!\n", __FILE__, __LINE__);
return 0;
}
else if(key < node->key[idx]) {
break;
}
}

if(NULL != node->child[idx]) {
node = node->child[idx];
}
else {
break;
}
}

/* 3. 执行插入操作 */
return _btree_insert(btree, node, key, idx);
}
static int _btree_merge(btree_t *btree, btree_node_t *left, btree_node_t *right, int mid)
{
int m = 0;
btree_node_t *parent = left->parent;

left->key[left->num++] = parent->key[mid];

memcpy(left->key + left->num, right->key, right->num*sizeof(int));
memcpy(left->child + left->num, right->child, (right->num+1)*sizeof(btree_node_t *));
for(m=0; m<=right->num; m++) {
if(NULL != right->child[m]) {
right->child[m]->parent = left;
}
}
left->num += right->num;

for(m=mid; m<parent->num-1; m++) {
parent->key[m] = parent->key[m+1];
parent->child[m+1] = parent->child[m+2];
}

parent->key[m] = 0;
parent->child[m+1] = NULL;
parent->num--;
free(right);

/* Check */
if(parent->num < btree->min) {
return btree_merge(btree, parent);
}

return 0;
}

static int btree_merge(btree_t *btree, btree_node_t *node)
{
int idx = 0, m = 0, mid = 0;
btree_node_t *parent = node->parent, *right = NULL, *left = NULL;

/* 1. node是根结点, 不必进行合并处理 */
if(NULL == parent) {
if(0 == node->num) {
if(NULL != node->child[0]) {
btree->root = node->child[0];
node->child[0]->parent = NULL;
}
else {
btree->root = NULL;
}
free(node);
}
return 0;
}

/* 2. 查找node是其父结点的第几个孩子结点 */
for(idx=0; idx<=parent->num; idx++) {
if(parent->child[idx] == node) {
break;
}
}

if(idx > parent->num) {
fprintf(stderr, "[%s][%d] Didn't find node in parent's children array!\n", __FILE__, __LINE__);
return -1;
}
/* 3. node: 最后一个孩子结点(left < node)
* node as right child */
else if(idx == parent->num) {
mid = idx - 1;
left = parent->child[mid];

/* 1) 合并结点 */
if((node->num + left->num + 1) <= btree->max) {
return _btree_merge(btree, left, node, mid);
}

/* 2) 借用结点:brother->key[num-1] */
for(m=node->num; m>0; m--) {
node->key[m] = node->key[m - 1];
node->child[m+1] = node->child[m];
}
node->child[1] = node->child[0];

node->key[0] = parent->key[mid];
node->num++;
node->child[0] = left->child[left->num];
if(NULL != left->child[left->num]) {
left->child[left->num]->parent = node;
}

parent->key[mid] = left->key[left->num - 1];
left->key[left->num - 1] = 0;
left->child[left->num] = NULL;
left->num--;
return 0;
}

/* 4. node: 非最后一个孩子结点(node < right)
* node as left child */
mid = idx;
right = parent->child[mid + 1];

/* 1) 合并结点 */
if((node->num + right->num + 1) <= btree->max) {
return _btree_merge(btree, node, right, mid);
}

/* 2) 借用结点: right->key[0] */
node->key[node->num++] = parent->key[mid];
node->child[node->num] = right->child[0];
if(NULL != right->child[0]) {
right->child[0]->parent = node;
}

parent->key[mid] = right->key[0];
for(m=0; m<right->num; m++) {
right->key[m] = right->key[m+1];
right->child[m] = right->child[m+1];
}
right->child[m] = NULL;
right->num--;
return 0;
}

static int _btree_delete(btree_t *btree, btree_node_t *node, int idx)
{
btree_node_t *orig = node, *child = node->child[idx];

/* 使用node->child[idx]中的最大值替代被删除的关键字 */
while(NULL != child) {
node = child;
child = node->child[child->num];
}

orig->key[idx] = node->key[node->num - 1];

/* 最终其处理过程相当于是删除最底层结点的关键字 */
node->key[--node->num] = 0;
if(node->num < btree->min) {
return btree_merge(btree, node);
}

return 0;
}

int btree_delete(btree_t *btree, int key)
{
int idx = 0;
btree_node_t *node = btree->root;

while(NULL != node) {
for(idx=0; idx<node->num; idx++) {
if(key == node->key[idx]) {
return _btree_delete(btree, node, idx);
}
else if(key < node->key[idx]) {
break;
}
}

node = node->child[idx];
}

return 0;
}

void Inorder(btree_node_t *root,int deep){
int i,j,k,a=1;
if(root != NULL)
{
if(deep)
printf("\n");
for(j = 0;j < deep;j++){
printf("---");
}
for(i = 0; i <= root->num;i++){
if(a){
printf("< %d | ",root->num);
for( k = 0;k < root->num;k++){
printf("%d ",root->key[k]);
}
a--;
printf(">");
}
Inorder(root->child[i],deep+1);
}
printf("\n");
}
}

int main(){
btree_t *bt;
int i;
int a[21]={3,4,44,12,67,98,32,43,24,100,34,55,33,13,25,8,5,41,77,200};
bt = btree_creat(4);
for(i = 0;i < 20;i++){
printf("insert %d: %d\n",i+1,a[i]);
btree_insert(bt,a[i]);
Inorder(bt->root,0);
printf("\n");
}

for(i = 0;i < 10;i++){
printf("delete %d: %d\n",i+1,a[i]);
btree_delete(bt,a[i]);
Inorder(bt->root,0);
}

return 0;
}


以上代码用Ubuntu16的gcc编译通过,包含一个演示插入删除的过程

step 3:

理解索引与主键的联系与区别

深入浅出数据库索引原理

step 4:

总结以及提升

MySQL索引背后的数据结构及算法原理
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: