levelDB源码分析-Skiplist
2012-06-14 16:19
344 查看
这里主要介绍levelDB中关于SkipList的实现,关于SkipList介绍请求参阅《SkipList》,这里不再引用了。
在levelDB中的使用:
levelDB中Memtable有一个核心的数据结构Skiplist,具体实现的代码稍有不同,但是基本原理是一致的。
levelDB中Skiplist定义为模板类:
结论
作为一种简单的数据结构,SkipList算法非常容易实现。在大多数应用中SkipList能够代替平衡树,SkipList和进行过优化的平衡树有着同样高的性能,性能远远超过未经优化的平衡二叉树。
代码编程技巧:可以采用内部类或者友元类定义迭代器iterator;
在levelDB中的使用:
levelDB中Memtable有一个核心的数据结构Skiplist,具体实现的代码稍有不同,但是基本原理是一致的。
levelDB中Skiplist定义为模板类:
// description: // Thread safety // Writes require external synchronization, most likely a mutex. // Reads require a guarantee that the SkipList will not be destroyed while the read is in progress. Apart from that, reads progress without any internal locking or synchronization. // // Invariants: // (1) Allocated nodes are never deleted until the SkipList is destroyed. This is trivially guaranteed by the code since we never delete any skip list nodes. // (2) The contents of a Node except for the next/prev pointers are immutable after the Node has been linked into the SkipList. Only Insert() modifies the list, and it is careful to initialize a node and use release-stores to publish the nodes in one or more lists. template<typename Key, class Comparator> class SkipList { private: struct Node; public: // Create a new SkipList object that will use "cmp" for comparing keys, and will allocate memory using "*arena". Objects allocated in the arena must remain allocated for the lifetime of the skiplist object. explicit SkipList(Comparator cmp, Arena* arena); // Insert key into the list. // REQUIRES: nothing that compares equal to key is currently in the list. void Insert(const Key& key); // 插入一个key到Skiplist中 // Returns true iff an entry that compares equal to key is in the list. bool Contains(const Key& key) const; // Skiplist中key的节点是否存在 private: enum { kMaxHeight = 12 }; // 最大level // Immutable after construction Comparator const compare_; // key值的比较函数,一旦初始化就不能变化了(当插入一些数据后,改变key,状态不可控) Arena* const arena_; // Arena used for allocations of nodes // levelDB中使用的Arena内存池对象 Node* const head_; // Skiplist头结点 // Modified only by Insert(). Read racily by readers, but stale // values are ok. port::AtomicPointer max_height_; // Height of the entire list // Skiplist层数 inline int GetMaxHeight() const { // 返回Skiplist的层数 return reinterpret_cast<intptr_t>(max_height_.NoBarrier_Load()); } // Read/written only by Insert(). Random rnd_; // 随机器,产生随机的level层数 Node* NewNode(const Key& key, int height); // 新建一个level=height,键位key的节点 int RandomHeight(); // 随机产生一个level层数 bool Equal(const Key& a, const Key& b) const { return (compare_(a, b) == 0); } // 比较2个key是否相等 // Return true if key is greater than the data stored in "n" bool KeyIsAfterNode(const Key& key, Node* n) const; // 比较key与Node n中的key,是否key在后面 // Return the earliest node that comes at or after key. // Return NULL if there is no such node. // If prev is non-NULL, fills prev[level] with pointer to previous // node at "level" for every level in [0..max_height_-1]. Node* FindGreaterOrEqual(const Key& key, Node** prev) const; // 找到key对应的Node或是key后面紧邻的Node // Return the latest node with a key < key, return head_ if there is no such node. Node* FindLessThan(const Key& key) const; // 找到key前面紧邻的Node // Return the last node in the list. // Return head_ if list is empty. Node* FindLast() const; // Skiplist最后一个Node // No copying allowed SkipList(const SkipList&); // 拷贝构造和赋值构造操作不允许 void operator=(const SkipList&); }; // Implementation details follow template<typename Key, class Comparator> struct SkipList<Key,Comparator>::Node { // Skiplist节点Node定义 explicit Node(const Key& k) : key(k) { } Key const key; // Accessors/mutators for links. Wrapped in methods so we can add the appropriate barriers as necessary. Node* Next(int n) { assert(n >= 0); // Use an 'acquire load' so that we observe a fully initialized version of the returned Node. return reinterpret_cast<Node*>(next_ .Acquire_Load()); } void SetNext(int n, Node* x) { assert(n >= 0); // Use a 'release store' so that anybody who reads through this pointer observes a fully initialized version of the inserted node. next_ .Release_Store(x); } // No-barrier variants that can be safely used in a few locations. Node* NoBarrier_Next(int n) { assert(n >= 0); return reinterpret_cast<Node*>(next_ .NoBarrier_Load()); } void NoBarrier_SetNext(int n, Node* x) { assert(n >= 0); next_ .NoBarrier_Store(x); } private: // Array of length equal to the node height. next_[0] is lowest level link. port::AtomicPointer next_[1]; // forward数组指针 }; template<typename Key, class Comparator> typename SkipList<Key,Comparator>::Node* SkipList<Key,Comparator>::NewNode(const Key& key, int height) { // 新建一个Node节点(指定key及level层数) char* mem = arena_->AllocateAligned(sizeof(Node) + sizeof(port::AtomicPointer) * (height - 1)); return new (mem) Node(key); // 显式调用new } template<typename Key, class Comparator> SkipList<Key,Comparator>::SkipList(Comparator cmp, Arena* arena) // 构造函数 : compare_(cmp), arena_(arena), head_(NewNode(0 /* any key will do */, kMaxHeight)), // 头节点的key没有意义 max_height_(reinterpret_cast<void*>(1)), rnd_(0xdeadbeef) { for (int i = 0; i < kMaxHeight; i++) { head_->SetNext(i, NULL); // 初始化头结点 } } template<typename Key, class Comparator> int SkipList<Key,Comparator>::RandomHeight() { // 返回随机高度(Skiplist依赖于这个随机性) // Increase height with probability 1 in kBranching static const unsigned int kBranching = 4; int height = 1; while (height < kMaxHeight && ((rnd_.Next() % kBranching) == 0)) { //? 直接取一个随机数不行?为什么要循环几次? height++; } assert(height > 0); assert(height <= kMaxHeight); return height; } template<typename Key, class Comparator> bool SkipList<Key,Comparator>::KeyIsAfterNode(const Key& key, Node* n) const { // Return true if key is greater than the key stored in "n" // NULL n is considered infinite,NULL被视为无限大(这样就考虑了结尾的NIL) return (n != NULL) && (compare_(n->key, key) < 0); } template<typename Key, class Comparator> typename SkipList<Key,Comparator>::Node* SkipList<Key,Comparator>::FindGreaterOrEqual(const Key& key, Node** prev) const { // Node* x = head_; int level = GetMaxHeight() - 1; while (true) { Node* next = x->Next(level); if (KeyIsAfterNode(key, next)) { // key在next节点后面,如果返回true,那么肯定next不为NULL // Keep searching in this list x = next; } else { if (prev != NULL) prev[level] = x; // 当前level上,x为高度>=key节点高度,且正好排在其前面,插入和删除时使用 if (level == 0) { return next; } else { // Switch to next list( low level link list) level--; } } } } template<typename Key, class Comparator> typename SkipList<Key,Comparator>::Node* SkipList<Key,Comparator>::FindLessThan(const Key& key) const { // Return the latest node with a key < key, return head_ if there is no such node. Node* x = head_; int level = GetMaxHeight() - 1; while (true) { assert(x == head_ || compare_(x->key, key) < 0); // Node* next = x->Next(level); if (next == NULL || compare_(next->key, key) >= 0) { // 从最高level尽可能向后移动更远的距离 // 后面key>查找的key时,或next为空时,level--,直到level=0 if (level == 0) { return x; } else { // Switch to next list level--; // 从最高层往下后续查找 } } else { x = next; } } } template<typename Key, class Comparator> typename SkipList<Key,Comparator>::Node* SkipList<Key,Comparator>::FindLast() const { // 先从最高level走到头,然后减少level继续走到头,一直到level=0 Node* x = head_; int level = GetMaxHeight() - 1; while (true) { Node* next = x->Next(level); if (next == NULL) { if (level == 0) { return x; } else { // Switch to next list level--; } } else { x = next; } } } template<typename Key, class Comparator> void SkipList<Key,Comparator>::Insert(const Key& key) { // 插入key节点 // TODO(opt): We can use a barrier-free variant of FindGreaterOrEqual() // here since Insert() is externally synchronized. Node* prev[kMaxHeight]; Node* x = FindGreaterOrEqual(key, prev); // prev记录每个level上前一个节点 // Our data structure does not allow duplicate insertion assert(x == NULL || !Equal(key, x->key)); int height = RandomHeight(); if (height > GetMaxHeight()) { for (int i = GetMaxHeight(); i < height; i++) { prev[i] = head_; } //fprintf(stderr, "Change height from %d to %d\n", max_height_, height); // It is ok to mutate max_height_ without any synchronization // with concurrent readers. A concurrent reader that observes // the new value of max_height_ will see either the old value of // new level pointers from head_ (NULL), or a new value set in // the loop below. In the former case the reader will // immediately drop to the next level since NULL sorts after all // keys. In the latter case the reader will use the new node. max_height_.NoBarrier_Store(reinterpret_cast<void*>(height)); } x = NewNode(key, height); // 新建一个Node for (int i = 0; i < height; i++) { // 根据当前节点的level层数,设置每个level的指针 // NoBarrier_SetNext() suffices since we will add a barrier when we publish a pointer to "x" in prev[i]. x->NoBarrier_SetNext(i, prev[i]->NoBarrier_Next(i)); prev[i]->SetNext(i, x); } } template<typename Key, class Comparator> bool SkipList<Key,Comparator>::Contains(const Key& key) const { // Skiplist是否包含key Node* x = FindGreaterOrEqual(key, NULL); // 查找大于或等于key的节点 if (x != NULL && Equal(key, x->key)) { // 非空,且相同,表示包含 return true; } else { return false; } } // Iteration over the contents of a skiplist template<typename Key, class Comparator> class SkipList<Key,Comparator>::Iterator { // Skiplist迭代器 public: // Initialize an iterator over the specified list. // The returned iterator is not valid. explicit Iterator(const SkipList* list); // Returns true iff the iterator is positioned at a valid node. bool Valid() const; // Returns the key at the current position. // REQUIRES: Valid() const Key& key() const; // Advances to the next position. // REQUIRES: Valid() void Next(); // Advances to the previous position. // REQUIRES: Valid() void Prev(); // Advance to the first entry with a key >= target void Seek(const Key& target); // Position at the first entry in list. // Final state of iterator is Valid() iff list is not empty. void SeekToFirst(); // Position at the last entry in list. // Final state of iterator is Valid() iff list is not empty. void SeekToLast(); private: const SkipList* list_; Node* node_; // Intentionally copyable 采用默认的copy构造函数,成员直接赋值 }; template<typename Key, class Comparator> inline SkipList<Key,Comparator>::Iterator::Iterator(const SkipList* list) { // 构造函数,初始化iterator list_ = list; node_ = NULL; } template<typename Key, class Comparator> inline bool SkipList<Key,Comparator>::Iterator::Valid() const { // Returns true iff the iterator is positioned at a valid node. return node_ != NULL; } template<typename Key, class Comparator> inline const Key& SkipList<Key,Comparator>::Iterator::key() const { // Returns the key at the current position. assert(Valid()); return node_->key; } template<typename Key, class Comparator> inline void SkipList<Key,Comparator>::Iterator::Next() { // Advances to the next position. assert(Valid()); node_ = node_->Next(0); // 从level 0后移指向下一个 } template<typename Key, class Comparator> inline void SkipList<Key,Comparator>::Iterator::Prev() { // Advances to the previous position. // Instead of using explicit "prev" links, we just search for the // last node that falls before key. assert(Valid()); node_ = list_->FindLessThan(node_->key); // 找到前一个节点,如果为head_,则设置为NULL if (node_ == list_->head_) { node_ = NULL; } } template<typename Key, class Comparator> inline void SkipList<Key,Comparator>::Iterator::Seek(const Key& target) { // Advance to the first entry with a key >= target node_ = list_->FindGreaterOrEqual(target, NULL); } // 第一个节点 template<typename Key, class Comparator> inline void SkipList<Key,Comparator>::Iterator::SeekToFirst() { // Position at the first entry in list. node_ = list_->head_->Next(0); } template<typename Key, class Comparator> inline void SkipList<Key,Comparator>::Iterator::SeekToLast() { // Position at the last entry in list. node_ = list_->FindLast(); // 查找最后一个节点,如果链表为空时,设置为null if (node_ == list_->head_) { node_ = NULL; } }
结论
作为一种简单的数据结构,SkipList算法非常容易实现。在大多数应用中SkipList能够代替平衡树,SkipList和进行过优化的平衡树有着同样高的性能,性能远远超过未经优化的平衡二叉树。
代码编程技巧:可以采用内部类或者友元类定义迭代器iterator;
相关文章推荐
- Leveldb源码分析--11
- leveldb源码分析四
- leveldb源码分析--SSTable之block
- LevelDB源码分析1-基础
- Leveldb源码分析--5
- Leveldb源码分析--1
- leveldb源码阅读分析笔记
- LevelDB源码分析-第一印象
- Leveldb源码分析--20
- leveldb源码分析入手
- Leveldb源码分析--15
- LevelDB源码分析之十:LOG文件
- Leveldb源码分析之Slice
- Leveldb源码分析--2
- levelDB源码分析-Memtable
- Leveldb源码分析--13
- leveldb源码分析--SSTable之Compaction 详解
- LevelDB源码分析之九:env
- LevelDB源码分析7-随机数产生
- Leveldb源码分析--3