Storage Engine API
mongo::BtreeLogic< BtreeLayout > Class Template Reference

This is the logic for manipulating the Btree. More...

#include <btree_logic.h>

Detailed Description

template<class BtreeLayout>
class mongo::BtreeLogic< BtreeLayout >

This is the logic for manipulating the Btree.

It is (mostly) independent of the on-disk format.

Classes

class  Builder
 
struct  FullKey
 This is an in memory wrapper for the variable length data associated with a KeyHeaderType. More...
 

Public Types

typedef BtreeLayout::FixedWidthKeyType KeyHeaderType
 
typedef BtreeLayout::KeyType KeyDataType
 
typedef BtreeLayout::KeyOwnedType KeyDataOwnedType
 
typedef BtreeLayout::LocType LocType
 
typedef BtreeLayout::BucketType BucketType
 

Public Member Functions

 BtreeLogic (HeadManager *head, RecordStore *store, SavedCursorRegistry *cursors, const Ordering &ordering, const std::string &indexName, bool isUnique)
 'head' manages the catalog information. More...
 
BuildernewBuilder (OperationContext *opCtx, bool dupsAllowed)
 Caller owns the returned pointer. More...
 
Status dupKeyCheck (OperationContext *opCtx, const BSONObj &key, const DiskLoc &loc) const
 
Status insert (OperationContext *opCtx, const BSONObj &rawKey, const DiskLoc &value, bool dupsAllowed)
 
bool locate (OperationContext *opCtx, const BSONObj &key, const DiskLoc &recordLoc, const int direction, int *posOut, DiskLoc *bucketLocOut) const
 Navigates down the tree and locates the bucket and position containing a record with the specified <key, recordLoc> combination. More...
 
void advance (OperationContext *opCtx, DiskLoc *bucketLocInOut, int *posInOut, int direction) const
 
bool exists (OperationContext *opCtx, const KeyDataType &key) const
 
bool unindex (OperationContext *opCtx, const BSONObj &key, const DiskLoc &recordLoc)
 
bool isEmpty (OperationContext *opCtx) const
 
long long fullValidate (OperationContext *, long long *unusedCount, bool strict, bool dumpBuckets, unsigned depth) const
 
DiskLoc getDiskLoc (OperationContext *opCtx, const DiskLoc &bucketLoc, const int keyOffset) const
 
BSONObj getKey (OperationContext *opCtx, const DiskLoc &bucketLoc, const int keyOffset) const
 
IndexKeyEntry getRandomEntry (OperationContext *opCtx) const
 Returns a pseudo-random element from the tree. More...
 
DiskLoc getHead (OperationContext *opCtx) const
 
Status touch (OperationContext *opCtx) const
 
void customLocate (OperationContext *opCtx, DiskLoc *locInOut, int *keyOfsInOut, const IndexSeekPoint &seekPoint, int direction) const
 
void advanceTo (OperationContext *, DiskLoc *thisLocInOut, int *keyOfsInOut, const IndexSeekPoint &seekPoint, int direction) const
 
void restorePosition (OperationContext *opCtx, const BSONObj &savedKey, const DiskLoc &savedLoc, int direction, DiskLoc *bucketInOut, int *keyOffsetInOut) const
 
Status initAsEmpty (OperationContext *opCtx)
 Returns OK if the index was uninitialized before, error status otherwise. More...
 
const RecordStoregetRecordStore () const
 
SavedCursorRegistrysavedCursors () const
 
Ordering ordering () const
 
int customBSONCmp (const BSONObj &inIndex_left, const IndexSeekPoint &seekPoint_right, int direction) const
 NOTE: Currently the Ordering implementation assumes a compound index will not have more keys than an unsigned variable has bits. More...
 
bool isUnique () const
 

Static Public Member Functions

static int lowWaterMark ()
 

Private Member Functions

bool basicInsert (OperationContext *opCtx, BucketType *bucket, const DiskLoc bucketLoc, int &keypos, const KeyDataType &key, const DiskLoc recordLoc)
 Durability note: More...
 
void dropFront (BucketType *bucket, int nDrop, int &refpos)
 
void _pack (OperationContext *opCtx, BucketType *bucket, const DiskLoc thisLoc, int &refPos)
 When we delete things, we just leave empty space until the node is full and then we repack it. More...
 
void customLocate (OperationContext *opCtx, DiskLoc *locInOut, int *keyOfsInOut, const IndexSeekPoint &seekPoint, int direction, std::pair< DiskLoc, int > &bestParent) const
 
Status _find (OperationContext *opCtx, BucketType *bucket, const KeyDataType &key, const DiskLoc &recordLoc, bool errorIfDup, int *keyPositionOut, bool *foundOut) const
 Find a key within this btree bucket. More...
 
bool customFind (OperationContext *opCtx, int low, int high, const IndexSeekPoint &seekPoint, int direction, DiskLoc *thisLocInOut, int *keyOfsInOut, std::pair< DiskLoc, int > &bestParent) const
 
void advanceToImpl (OperationContext *opCtx, DiskLoc *thisLocInOut, int *keyOfsInOut, const IndexSeekPoint &seekPoint, int direction) const
 find smallest/biggest value greater-equal/less-equal than specified More...
 
bool wouldCreateDup (OperationContext *opCtx, const KeyDataType &key, const DiskLoc self) const
 
bool keyIsUsed (OperationContext *opCtx, const DiskLoc &loc, const int &pos) const
 
void skipUnusedKeys (OperationContext *opCtx, DiskLoc *loc, int *pos, int direction) const
 
DiskLoc advance (OperationContext *opCtx, const DiskLoc &bucketLoc, int *posInOut, int direction) const
 
DiskLoc _locate (OperationContext *opCtx, const DiskLoc &bucketLoc, const KeyDataType &key, int *posOut, bool *foundOut, const DiskLoc &recordLoc, const int direction) const
 Recursively walk down the btree, looking for a match of key and recordLoc. More...
 
long long _fullValidate (OperationContext *opCtx, const DiskLoc bucketLoc, long long *unusedCount, bool strict, bool dumpBuckets, unsigned depth) const
 
DiskLoc _addBucket (OperationContext *opCtx)
 
bool canMergeChildren (OperationContext *opCtx, BucketType *bucket, const DiskLoc bucketLoc, const int leftIndex)
 
int _rebalancedSeparatorPos (OperationContext *opCtx, BucketType *bucket, int leftIndex)
 This implementation must respect the meaning and value of lowWaterMark. More...
 
void _packReadyForMod (BucketType *bucket, int &refPos)
 Version when write intent already declared. More...
 
void truncateTo (BucketType *bucket, int N, int &refPos)
 
void split (OperationContext *opCtx, BucketType *bucket, const DiskLoc bucketLoc, int keypos, const DiskLoc recordLoc, const KeyDataType &key, const DiskLoc lchild, const DiskLoc rchild)
 
Status _insert (OperationContext *opCtx, BucketType *bucket, const DiskLoc bucketLoc, const KeyDataType &key, const DiskLoc recordLoc, bool dupsAllowed, const DiskLoc leftChild, const DiskLoc rightChild)
 
void insertHere (OperationContext *opCtx, const DiskLoc bucketLoc, int pos, const KeyDataType &key, const DiskLoc recordLoc, const DiskLoc leftChild, const DiskLoc rightChild)
 insert a key in this bucket, splitting if necessary. More...
 
std::string dupKeyError (const KeyDataType &key) const
 
void setInternalKey (OperationContext *opCtx, BucketType *bucket, const DiskLoc bucketLoc, int keypos, const DiskLoc recordLoc, const KeyDataType &key, const DiskLoc lchild, const DiskLoc rchild)
 
void fixParentPtrs (OperationContext *trans, BucketType *bucket, const DiskLoc bucketLoc, int firstIndex=0, int lastIndex=-1)
 This can cause a lot of additional page writes when we assign buckets to different parents. More...
 
bool mayBalanceWithNeighbors (OperationContext *opCtx, BucketType *bucket, const DiskLoc bucketLoc)
 
void doBalanceChildren (OperationContext *opCtx, BucketType *bucket, const DiskLoc bucketLoc, int leftIndex)
 
void doBalanceLeftToRight (OperationContext *opCtx, BucketType *bucket, const DiskLoc thisLoc, int leftIndex, int split, BucketType *l, const DiskLoc lchild, BucketType *r, const DiskLoc rchild)
 
void doBalanceRightToLeft (OperationContext *opCtx, BucketType *bucket, const DiskLoc thisLoc, int leftIndex, int split, BucketType *l, const DiskLoc lchild, BucketType *r, const DiskLoc rchild)
 
bool tryBalanceChildren (OperationContext *opCtx, BucketType *bucket, const DiskLoc bucketLoc, int leftIndex)
 
int indexInParent (OperationContext *opCtx, BucketType *bucket, const DiskLoc bucketLoc) const
 
void doMergeChildren (OperationContext *opCtx, BucketType *bucket, const DiskLoc bucketLoc, int leftIndex)
 
void replaceWithNextChild (OperationContext *opCtx, BucketType *bucket, const DiskLoc bucketLoc)
 
void deleteInternalKey (OperationContext *opCtx, BucketType *bucket, const DiskLoc bucketLoc, int keypos)
 This function replaces the specified key (k) by either the prev or next key in the btree (k'). More...
 
void delKeyAtPos (OperationContext *opCtx, BucketType *bucket, const DiskLoc bucketLoc, int p)
 May delete the bucket 'bucket' rendering 'bucketLoc' invalid. More...
 
void delBucket (OperationContext *opCtx, BucketType *bucket, const DiskLoc bucketLoc)
 
void deallocBucket (OperationContext *opCtx, BucketType *bucket, const DiskLoc bucketLoc)
 
bool _keyIsAt (const BSONObj &savedKey, const DiskLoc &savedLoc, BucketType *bucket, int keyPos) const
 
bool pushBack (BucketType *bucket, const DiskLoc recordLoc, const KeyDataType &key, const DiskLoc prevChild)
 Tries to push key into bucket. More...
 
BucketTypechildForPos (OperationContext *opCtx, BucketType *bucket, int pos) const
 
BucketTypegetBucket (OperationContext *opCtx, const DiskLoc dl) const
 
BucketTypegetBucket (OperationContext *opCtx, const RecordId dl) const
 
BucketTypegetRoot (OperationContext *opCtx) const
 
DiskLoc getRootLoc (OperationContext *opCtx) const
 
void recordRandomWalk (OperationContext *opCtx, PseudoRandom *prng, BucketType *curBucket, int64_t nBucketsInCurrentLevel, std::vector< int64_t > *nKeysInLevel, std::vector< FullKey > *selectedKeys) const
 Does a random walk through the tree, recording information about the walk along the way. More...
 

Static Private Member Functions

static LocTypechildLocForPos (BucketType *bucket, int pos)
 
static FullKey getFullKey (const BucketType *bucket, int i)
 
static KeyHeaderTypegetKeyHeader (BucketType *bucket, int i)
 
static const KeyHeaderTypegetKeyHeader (const BucketType *bucket, int i)
 
static char * dataAt (BucketType *bucket, short ofs)
 
static void markUnused (BucketType *bucket, int keypos)
 
static int totalDataSize (BucketType *bucket)
 
static void init (BucketType *bucket)
 
static int _alloc (BucketType *bucket, int bytes)
 We allocate space from the end of the buffer for data. More...
 
static void _unalloc (BucketType *bucket, int bytes)
 
static void _delKeyAtPos (BucketType *bucket, int keypos, bool mayEmpty=false)
 
static void popBack (BucketType *bucket, DiskLoc *recordLocOut, KeyDataType *keyDataOut)
 Pull rightmost key from the bucket and set its prevChild pointer to be the nextChild for the whole bucket. More...
 
static bool mayDropKey (BucketType *bucket, int index, int refPos)
 With this implementation, refPos == 0 disregards effect of refPos. More...
 
static int _packedDataSize (BucketType *bucket, int refPos)
 
static void setPacked (BucketType *bucket)
 
static void setNotPacked (BucketType *bucket)
 
static BucketTypebtreemod (OperationContext *opCtx, BucketType *bucket)
 
static int splitPos (BucketType *bucket, int keypos)
 In the standard btree algorithm, we would split based on the existing keys and the new key. More...
 
static void reserveKeysFront (BucketType *bucket, int nAdd)
 
static void setKey (BucketType *bucket, int i, const DiskLoc recordLoc, const KeyDataType &key, const DiskLoc prevChildBucket)
 
static bool isHead (BucketType *bucket)
 
static void dumpBucket (const BucketType *bucket, int indentLength=0)
 
static void assertValid (const std::string &ns, BucketType *bucket, const Ordering &ordering, bool force=false)
 

Private Attributes

HeadManager_headManager
 
RecordStore_recordStore
 
SavedCursorRegistry_cursorRegistry
 
Ordering _ordering
 
std::string _indexName
 
const bool _isUnique
 

Friends

class BtreeLogic::Builder
 
class BtreeLogicTestBase< BtreeLayout >
 
class ArtificialTreeBuilder< BtreeLayout >
 

Member Typedef Documentation

◆ BucketType

template<class BtreeLayout>
typedef BtreeLayout::BucketType mongo::BtreeLogic< BtreeLayout >::BucketType

◆ KeyDataOwnedType

template<class BtreeLayout>
typedef BtreeLayout::KeyOwnedType mongo::BtreeLogic< BtreeLayout >::KeyDataOwnedType

◆ KeyDataType

template<class BtreeLayout>
typedef BtreeLayout::KeyType mongo::BtreeLogic< BtreeLayout >::KeyDataType

◆ KeyHeaderType

template<class BtreeLayout>
typedef BtreeLayout::FixedWidthKeyType mongo::BtreeLogic< BtreeLayout >::KeyHeaderType

◆ LocType

template<class BtreeLayout>
typedef BtreeLayout::LocType mongo::BtreeLogic< BtreeLayout >::LocType

Constructor & Destructor Documentation

◆ BtreeLogic()

template<class BtreeLayout>
mongo::BtreeLogic< BtreeLayout >::BtreeLogic ( HeadManager head,
RecordStore store,
SavedCursorRegistry cursors,
const Ordering &  ordering,
const std::string &  indexName,
bool  isUnique 
)
inline

'head' manages the catalog information.

'store' allocates and frees buckets. 'ordering' is meta-information we store in the catalog. 'indexName' is a string identifying the index that we use to print errors with.

Member Function Documentation

◆ _addBucket()

template<class BtreeLayout >
DiskLoc mongo::BtreeLogic< BtreeLayout >::_addBucket ( OperationContext *  opCtx)
private

◆ _alloc()

template<class BtreeLayout >
int mongo::BtreeLogic< BtreeLayout >::_alloc ( BucketType bucket,
int  bytes 
)
staticprivate

We allocate space from the end of the buffer for data.

The keynodes grow from the front.

◆ _delKeyAtPos()

template<class BtreeLayout >
void mongo::BtreeLogic< BtreeLayout >::_delKeyAtPos ( BucketType bucket,
int  keypos,
bool  mayEmpty = false 
)
staticprivate

◆ _find()

template<class BtreeLayout >
Status mongo::BtreeLogic< BtreeLayout >::_find ( OperationContext *  opCtx,
BucketType bucket,
const KeyDataType key,
const DiskLoc recordLoc,
bool  errorIfDup,
int *  keyPositionOut,
bool *  foundOut 
) const
private

Find a key within this btree bucket.

When duplicate keys are allowed, we use the DiskLoc of the record as if it were part of the key. That assures that even when there are many duplicates (e.g., 1 million) for a key, our performance is still good.

assertIfDup: if the key exists (ignoring the recordLoc), uassert

pos: for existing keys k0...kn-1. returns # it goes BEFORE. so key[pos-1] < key < key[pos] returns n if it goes after the last existing key. note result might be an Unused location!

◆ _fullValidate()

template<class BtreeLayout >
long long mongo::BtreeLogic< BtreeLayout >::_fullValidate ( OperationContext *  opCtx,
const DiskLoc  bucketLoc,
long long *  unusedCount,
bool  strict,
bool  dumpBuckets,
unsigned  depth 
) const
private

◆ _insert()

template<class BtreeLayout >
Status mongo::BtreeLogic< BtreeLayout >::_insert ( OperationContext *  opCtx,
BucketType bucket,
const DiskLoc  bucketLoc,
const KeyDataType key,
const DiskLoc  recordLoc,
bool  dupsAllowed,
const DiskLoc  leftChild,
const DiskLoc  rightChild 
)
private

◆ _keyIsAt()

template<class BtreeLayout >
bool mongo::BtreeLogic< BtreeLayout >::_keyIsAt ( const BSONObj &  savedKey,
const DiskLoc savedLoc,
BucketType bucket,
int  keyPos 
) const
private

◆ _locate()

template<class BtreeLayout >
DiskLoc mongo::BtreeLogic< BtreeLayout >::_locate ( OperationContext *  opCtx,
const DiskLoc bucketLoc,
const KeyDataType key,
int *  posOut,
bool *  foundOut,
const DiskLoc recordLoc,
const int  direction 
) const
private

Recursively walk down the btree, looking for a match of key and recordLoc.

Caller should have acquired lock on bucketLoc.

◆ _pack()

template<class BtreeLayout >
void mongo::BtreeLogic< BtreeLayout >::_pack ( OperationContext *  opCtx,
BucketType bucket,
const DiskLoc  thisLoc,
int &  refPos 
)
private

When we delete things, we just leave empty space until the node is full and then we repack it.

◆ _packedDataSize()

template<class BtreeLayout >
int mongo::BtreeLogic< BtreeLayout >::_packedDataSize ( BucketType bucket,
int  refPos 
)
staticprivate

◆ _packReadyForMod()

template<class BtreeLayout >
void mongo::BtreeLogic< BtreeLayout >::_packReadyForMod ( BucketType bucket,
int &  refPos 
)
private

Version when write intent already declared.

◆ _rebalancedSeparatorPos()

template<class BtreeLayout >
int mongo::BtreeLogic< BtreeLayout >::_rebalancedSeparatorPos ( OperationContext *  opCtx,
BucketType bucket,
int  leftIndex 
)
private

This implementation must respect the meaning and value of lowWaterMark.

Also see comments in splitPos().

◆ _unalloc()

template<class BtreeLayout >
void mongo::BtreeLogic< BtreeLayout >::_unalloc ( BucketType bucket,
int  bytes 
)
staticprivate

◆ advance() [1/2]

template<class BtreeLayout >
void mongo::BtreeLogic< BtreeLayout >::advance ( OperationContext *  opCtx,
DiskLoc bucketLocInOut,
int *  posInOut,
int  direction 
) const

◆ advance() [2/2]

template<class BtreeLayout >
DiskLoc mongo::BtreeLogic< BtreeLayout >::advance ( OperationContext *  opCtx,
const DiskLoc bucketLoc,
int *  posInOut,
int  direction 
) const
private

◆ advanceTo()

template<class BtreeLayout >
void mongo::BtreeLogic< BtreeLayout >::advanceTo ( OperationContext *  opCtx,
DiskLoc thisLocInOut,
int *  keyOfsInOut,
const IndexSeekPoint seekPoint,
int  direction 
) const

◆ advanceToImpl()

template<class BtreeLayout >
void mongo::BtreeLogic< BtreeLayout >::advanceToImpl ( OperationContext *  opCtx,
DiskLoc thisLocInOut,
int *  keyOfsInOut,
const IndexSeekPoint seekPoint,
int  direction 
) const
private

find smallest/biggest value greater-equal/less-equal than specified

starting thisLoc + keyOfs will be strictly less than/strictly greater than keyBegin/keyBeginLen/keyEnd

All the direction checks below allowed me to refactor the code, but possibly separate forward and reverse implementations would be more efficient

◆ assertValid()

template<class BtreeLayout >
void mongo::BtreeLogic< BtreeLayout >::assertValid ( const std::string &  ns,
BucketType bucket,
const Ordering &  ordering,
bool  force = false 
)
staticprivate

◆ basicInsert()

template<class BtreeLayout >
bool mongo::BtreeLogic< BtreeLayout >::basicInsert ( OperationContext *  opCtx,
BucketType bucket,
const DiskLoc  bucketLoc,
int &  keypos,
const KeyDataType key,
const DiskLoc  recordLoc 
)
private

Durability note:

We do separate intent declarations herein. Arguably one could just declare the whole bucket given we do group commits. This is something we could investigate later as to what is faster. Insert a key in a bucket with no complexity – no splits required Returns false if a split is required.

◆ btreemod()

template<class BtreeLayout >
BtreeLogic< BtreeLayout >::BucketType * mongo::BtreeLogic< BtreeLayout >::btreemod ( OperationContext *  opCtx,
BucketType bucket 
)
staticprivate

◆ canMergeChildren()

template<class BtreeLayout >
bool mongo::BtreeLogic< BtreeLayout >::canMergeChildren ( OperationContext *  opCtx,
BucketType bucket,
const DiskLoc  bucketLoc,
const int  leftIndex 
)
private

◆ childForPos()

template<class BtreeLayout >
BtreeLogic< BtreeLayout >::BucketType * mongo::BtreeLogic< BtreeLayout >::childForPos ( OperationContext *  opCtx,
BucketType bucket,
int  pos 
) const
private

◆ childLocForPos()

template<class BtreeLayout >
BtreeLogic< BtreeLayout >::LocType & mongo::BtreeLogic< BtreeLayout >::childLocForPos ( BucketType bucket,
int  pos 
)
staticprivate

◆ customBSONCmp()

template<class BtreeLayout >
int mongo::BtreeLogic< BtreeLayout >::customBSONCmp ( const BSONObj &  left,
const IndexSeekPoint right,
int  direction 
) const

NOTE: Currently the Ordering implementation assumes a compound index will not have more keys than an unsigned variable has bits.

The same assumption is used in the implementation below with respect to the 'mask' variable.

'l' is a regular bsonobj

'rBegin' is composed partly of an existing bsonobj, and the remaining keys are taken from a vector of elements that frequently changes

see https://jira.mongodb.org/browse/SERVER-371

◆ customFind()

template<class BtreeLayout>
bool mongo::BtreeLogic< BtreeLayout >::customFind ( OperationContext *  opCtx,
int  low,
int  high,
const IndexSeekPoint seekPoint,
int  direction,
DiskLoc thisLocInOut,
int *  keyOfsInOut,
std::pair< DiskLoc, int > &  bestParent 
) const
private

◆ customLocate() [1/2]

template<class BtreeLayout >
void mongo::BtreeLogic< BtreeLayout >::customLocate ( OperationContext *  opCtx,
DiskLoc locInOut,
int *  keyOfsInOut,
const IndexSeekPoint seekPoint,
int  direction 
) const

◆ customLocate() [2/2]

template<class BtreeLayout>
void mongo::BtreeLogic< BtreeLayout >::customLocate ( OperationContext *  opCtx,
DiskLoc locInOut,
int *  keyOfsInOut,
const IndexSeekPoint seekPoint,
int  direction,
std::pair< DiskLoc, int > &  bestParent 
) const
private

◆ dataAt()

template<class BtreeLayout >
char * mongo::BtreeLogic< BtreeLayout >::dataAt ( BucketType bucket,
short  ofs 
)
staticprivate

◆ deallocBucket()

template<class BtreeLayout >
void mongo::BtreeLogic< BtreeLayout >::deallocBucket ( OperationContext *  opCtx,
BucketType bucket,
const DiskLoc  bucketLoc 
)
private

◆ delBucket()

template<class BtreeLayout >
void mongo::BtreeLogic< BtreeLayout >::delBucket ( OperationContext *  opCtx,
BucketType bucket,
const DiskLoc  bucketLoc 
)
private

◆ deleteInternalKey()

template<class BtreeLayout >
void mongo::BtreeLogic< BtreeLayout >::deleteInternalKey ( OperationContext *  opCtx,
BucketType bucket,
const DiskLoc  bucketLoc,
int  keypos 
)
private

This function replaces the specified key (k) by either the prev or next key in the btree (k').

We require that k have either a left or right child. If k has a left child, we set k' to the prev key of k, which must be a leaf present in the left child. If k does not have a left child, we set k' to the next key of k, which must be a leaf present in the right child. When we replace k with k', we copy k' over k (which may cause a split) and then remove k' from its original location. Because k' is stored in a descendent of k, replacing k by k' will not modify the storage location of the original k', and we can easily remove k' from its original location.

This function is only needed in cases where k has a left or right child; in other cases a simpler key removal implementation is possible.

NOTE on noncompliant BtreeBuilder btrees: It is possible (though likely rare) for btrees created by BtreeBuilder to have k' that is not a leaf, see SERVER-2732. These cases are handled in the same manner as described in the "legacy btree structures" note below.

NOTE on legacy btree structures: In legacy btrees, k' can be a nonleaf. In such a case we 'delete' k by marking it as an unused node rather than replacing it with k'. Also, k' may be a leaf but marked as an unused node. In such a case we replace k by k', preserving the key's unused marking. This function is only expected to mark a key as unused when handling a legacy btree.

◆ delKeyAtPos()

template<class BtreeLayout >
void mongo::BtreeLogic< BtreeLayout >::delKeyAtPos ( OperationContext *  opCtx,
BucketType bucket,
const DiskLoc  bucketLoc,
int  p 
)
private

May delete the bucket 'bucket' rendering 'bucketLoc' invalid.

◆ doBalanceChildren()

template<class BtreeLayout >
void mongo::BtreeLogic< BtreeLayout >::doBalanceChildren ( OperationContext *  opCtx,
BucketType bucket,
const DiskLoc  bucketLoc,
int  leftIndex 
)
private

◆ doBalanceLeftToRight()

template<class BtreeLayout >
void mongo::BtreeLogic< BtreeLayout >::doBalanceLeftToRight ( OperationContext *  opCtx,
BucketType bucket,
const DiskLoc  thisLoc,
int  leftIndex,
int  split,
BucketType l,
const DiskLoc  lchild,
BucketType r,
const DiskLoc  rchild 
)
private

◆ doBalanceRightToLeft()

template<class BtreeLayout >
void mongo::BtreeLogic< BtreeLayout >::doBalanceRightToLeft ( OperationContext *  opCtx,
BucketType bucket,
const DiskLoc  thisLoc,
int  leftIndex,
int  split,
BucketType l,
const DiskLoc  lchild,
BucketType r,
const DiskLoc  rchild 
)
private

◆ doMergeChildren()

template<class BtreeLayout >
void mongo::BtreeLogic< BtreeLayout >::doMergeChildren ( OperationContext *  opCtx,
BucketType bucket,
const DiskLoc  bucketLoc,
int  leftIndex 
)
private

◆ dropFront()

template<class BtreeLayout >
void mongo::BtreeLogic< BtreeLayout >::dropFront ( BucketType bucket,
int  nDrop,
int &  refpos 
)
private

◆ dumpBucket()

template<class BtreeLayout >
void mongo::BtreeLogic< BtreeLayout >::dumpBucket ( const BucketType bucket,
int  indentLength = 0 
)
staticprivate

◆ dupKeyCheck()

template<class BtreeLayout >
Status mongo::BtreeLogic< BtreeLayout >::dupKeyCheck ( OperationContext *  opCtx,
const BSONObj &  key,
const DiskLoc loc 
) const

◆ dupKeyError()

template<class BtreeLayout >
string mongo::BtreeLogic< BtreeLayout >::dupKeyError ( const KeyDataType key) const
private

◆ exists()

template<class BtreeLayout >
bool mongo::BtreeLogic< BtreeLayout >::exists ( OperationContext *  opCtx,
const KeyDataType key 
) const

◆ fixParentPtrs()

template<class BtreeLayout >
void mongo::BtreeLogic< BtreeLayout >::fixParentPtrs ( OperationContext *  opCtx,
BucketType bucket,
const DiskLoc  bucketLoc,
int  firstIndex = 0,
int  lastIndex = -1 
)
private

This can cause a lot of additional page writes when we assign buckets to different parents.

Maybe get rid of parent ptrs?

◆ fullValidate()

template<class BtreeLayout >
long long mongo::BtreeLogic< BtreeLayout >::fullValidate ( OperationContext *  opCtx,
long long *  unusedCount,
bool  strict,
bool  dumpBuckets,
unsigned  depth 
) const

◆ getBucket() [1/2]

template<class BtreeLayout>
BucketType* mongo::BtreeLogic< BtreeLayout >::getBucket ( OperationContext *  opCtx,
const DiskLoc  dl 
) const
inlineprivate

◆ getBucket() [2/2]

template<class BtreeLayout >
BtreeLogic< BtreeLayout >::BucketType * mongo::BtreeLogic< BtreeLayout >::getBucket ( OperationContext *  opCtx,
const RecordId  dl 
) const
private

◆ getDiskLoc()

template<class BtreeLayout >
DiskLoc mongo::BtreeLogic< BtreeLayout >::getDiskLoc ( OperationContext *  opCtx,
const DiskLoc bucketLoc,
const int  keyOffset 
) const

◆ getFullKey()

template<class BtreeLayout >
BtreeLogic< BtreeLayout >::FullKey mongo::BtreeLogic< BtreeLayout >::getFullKey ( const BucketType bucket,
int  i 
)
staticprivate

◆ getHead()

template<class BtreeLayout>
DiskLoc mongo::BtreeLogic< BtreeLayout >::getHead ( OperationContext *  opCtx) const
inline

◆ getKey()

template<class BtreeLayout >
BSONObj mongo::BtreeLogic< BtreeLayout >::getKey ( OperationContext *  opCtx,
const DiskLoc bucketLoc,
const int  keyOffset 
) const

◆ getKeyHeader() [1/2]

template<class BtreeLayout >
BtreeLogic< BtreeLayout >::KeyHeaderType & mongo::BtreeLogic< BtreeLayout >::getKeyHeader ( BucketType bucket,
int  i 
)
staticprivate

◆ getKeyHeader() [2/2]

template<class BtreeLayout >
const BtreeLogic< BtreeLayout >::KeyHeaderType & mongo::BtreeLogic< BtreeLayout >::getKeyHeader ( const BucketType bucket,
int  i 
)
staticprivate

◆ getRandomEntry()

template<class BtreeLayout >
IndexKeyEntry mongo::BtreeLogic< BtreeLayout >::getRandomEntry ( OperationContext *  opCtx) const

Returns a pseudo-random element from the tree.

It is an error to call this method if the tree is empty.

◆ getRecordStore()

template<class BtreeLayout>
const RecordStore* mongo::BtreeLogic< BtreeLayout >::getRecordStore ( ) const
inline

◆ getRoot()

template<class BtreeLayout >
BtreeLogic< BtreeLayout >::BucketType * mongo::BtreeLogic< BtreeLayout >::getRoot ( OperationContext *  opCtx) const
private

◆ getRootLoc()

template<class BtreeLayout >
DiskLoc mongo::BtreeLogic< BtreeLayout >::getRootLoc ( OperationContext *  opCtx) const
private

◆ indexInParent()

template<class BtreeLayout >
int mongo::BtreeLogic< BtreeLayout >::indexInParent ( OperationContext *  opCtx,
BucketType bucket,
const DiskLoc  bucketLoc 
) const
private

◆ init()

template<class BtreeLayout >
void mongo::BtreeLogic< BtreeLayout >::init ( BucketType bucket)
staticprivate

◆ initAsEmpty()

template<class BtreeLayout >
Status mongo::BtreeLogic< BtreeLayout >::initAsEmpty ( OperationContext *  opCtx)

Returns OK if the index was uninitialized before, error status otherwise.

◆ insert()

template<class BtreeLayout >
Status mongo::BtreeLogic< BtreeLayout >::insert ( OperationContext *  opCtx,
const BSONObj &  rawKey,
const DiskLoc value,
bool  dupsAllowed 
)

◆ insertHere()

template<class BtreeLayout >
void mongo::BtreeLogic< BtreeLayout >::insertHere ( OperationContext *  opCtx,
const DiskLoc  bucketLoc,
int  pos,
const KeyDataType key,
const DiskLoc  recordLoc,
const DiskLoc  leftChildLoc,
const DiskLoc  rightChildLoc 
)
private

insert a key in this bucket, splitting if necessary.

- where to insert the key in range 0..n. 0=make leftmost, n=make rightmost. NOTE this function may free some data, and as a result the value passed for keypos may be invalid after calling insertHere()

Some of the write intent signaling below relies on the implementation of the optimized write intent code in basicInsert().

◆ isEmpty()

template<class BtreeLayout >
bool mongo::BtreeLogic< BtreeLayout >::isEmpty ( OperationContext *  opCtx) const

◆ isHead()

template<class BtreeLayout >
bool mongo::BtreeLogic< BtreeLayout >::isHead ( BucketType bucket)
staticprivate

◆ isUnique()

template<class BtreeLayout>
bool mongo::BtreeLogic< BtreeLayout >::isUnique ( ) const
inline

◆ keyIsUsed()

template<class BtreeLayout >
bool mongo::BtreeLogic< BtreeLayout >::keyIsUsed ( OperationContext *  opCtx,
const DiskLoc loc,
const int &  pos 
) const
private

◆ locate()

template<class BtreeLayout >
bool mongo::BtreeLogic< BtreeLayout >::locate ( OperationContext *  opCtx,
const BSONObj &  key,
const DiskLoc recordLoc,
const int  direction,
int *  posOut,
DiskLoc bucketLocOut 
) const

Navigates down the tree and locates the bucket and position containing a record with the specified <key, recordLoc> combination.

Returns
true if the exact <key, recordLoc> was found. Otherwise, false and the bucketLocOut would contain the bucket containing key which is before or after the searched one (dependent on the direction).

◆ lowWaterMark()

template<class BtreeLayout >
int mongo::BtreeLogic< BtreeLayout >::lowWaterMark ( )
static

◆ markUnused()

template<class BtreeLayout >
void mongo::BtreeLogic< BtreeLayout >::markUnused ( BucketType bucket,
int  keypos 
)
staticprivate

◆ mayBalanceWithNeighbors()

template<class BtreeLayout >
bool mongo::BtreeLogic< BtreeLayout >::mayBalanceWithNeighbors ( OperationContext *  opCtx,
BucketType bucket,
const DiskLoc  bucketLoc 
)
private

◆ mayDropKey()

template<class BtreeLayout >
bool mongo::BtreeLogic< BtreeLayout >::mayDropKey ( BucketType bucket,
int  index,
int  refPos 
)
staticprivate

With this implementation, refPos == 0 disregards effect of refPos.

index > 0 prevents creation of an empty bucket.

◆ newBuilder()

template<class BtreeLayout >
BtreeLogic< BtreeLayout >::Builder * mongo::BtreeLogic< BtreeLayout >::newBuilder ( OperationContext *  opCtx,
bool  dupsAllowed 
)

Caller owns the returned pointer.

'this' must outlive the returned pointer.

◆ ordering()

template<class BtreeLayout>
Ordering mongo::BtreeLogic< BtreeLayout >::ordering ( ) const
inline

◆ popBack()

template<class BtreeLayout >
void mongo::BtreeLogic< BtreeLayout >::popBack ( BucketType bucket,
DiskLoc recordLocOut,
KeyDataType keyDataOut 
)
staticprivate

Pull rightmost key from the bucket and set its prevChild pointer to be the nextChild for the whole bucket.

It is assumed that caller already has the old value of the nextChild pointer and is about to add a pointer to it elsewhere in the tree.

This is only used by BtreeLogic::Builder. Think very hard (and change this comment) before using it anywhere else.

WARNING: The keyDataOut that is filled out by this function points to newly unalloced memory inside of this bucket. It only remains valid until the next write to this bucket.

◆ pushBack()

template<class BtreeLayout >
bool mongo::BtreeLogic< BtreeLayout >::pushBack ( BucketType bucket,
const DiskLoc  recordLoc,
const KeyDataType key,
const DiskLoc  prevChild 
)
private

Tries to push key into bucket.

Add a key.

Return false if it can't because key doesn't fit.

bucket must be declared as writable by the caller. The new key/recordLoc pair must be higher than any others in bucket.

TODO needs 'this' for _ordering for sanity check

Must be > all existing. Be careful to set next ptr right.

◆ recordRandomWalk()

template<class BtreeLayout>
void mongo::BtreeLogic< BtreeLayout >::recordRandomWalk ( OperationContext *  opCtx,
PseudoRandom *  prng,
BucketType curBucket,
int64_t  nBucketsInCurrentLevel,
std::vector< int64_t > *  nKeysInLevel,
std::vector< FullKey > *  selectedKeys 
) const
private

Does a random walk through the tree, recording information about the walk along the way.

'nKeysInLevel' will be filled in such that 'nKeysInLevel[i]' is an approximation of the number of keys in the ith level of the B-tree.

'selectedKeys' will be filled in such that 'selectedKeys[i]' will be a pseudo-random key selected from the bucket we went through on the ith level of the B-tree.

◆ replaceWithNextChild()

template<class BtreeLayout >
void mongo::BtreeLogic< BtreeLayout >::replaceWithNextChild ( OperationContext *  opCtx,
BucketType bucket,
const DiskLoc  bucketLoc 
)
private

◆ reserveKeysFront()

template<class BtreeLayout >
void mongo::BtreeLogic< BtreeLayout >::reserveKeysFront ( BucketType bucket,
int  nAdd 
)
staticprivate

◆ restorePosition()

template<class BtreeLayout >
void mongo::BtreeLogic< BtreeLayout >::restorePosition ( OperationContext *  opCtx,
const BSONObj &  savedKey,
const DiskLoc savedLoc,
int  direction,
DiskLoc bucketInOut,
int *  keyOffsetInOut 
) const

◆ savedCursors()

template<class BtreeLayout>
SavedCursorRegistry* mongo::BtreeLogic< BtreeLayout >::savedCursors ( ) const
inline

◆ setInternalKey()

template<class BtreeLayout >
void mongo::BtreeLogic< BtreeLayout >::setInternalKey ( OperationContext *  opCtx,
BucketType bucket,
const DiskLoc  bucketLoc,
int  keypos,
const DiskLoc  recordLoc,
const KeyDataType key,
const DiskLoc  lchild,
const DiskLoc  rchild 
)
private

◆ setKey()

template<class BtreeLayout >
void mongo::BtreeLogic< BtreeLayout >::setKey ( BucketType bucket,
int  i,
const DiskLoc  recordLoc,
const KeyDataType key,
const DiskLoc  prevChildBucket 
)
staticprivate

◆ setNotPacked()

template<class BtreeLayout >
void mongo::BtreeLogic< BtreeLayout >::setNotPacked ( BucketType bucket)
staticprivate

◆ setPacked()

template<class BtreeLayout >
void mongo::BtreeLogic< BtreeLayout >::setPacked ( BucketType bucket)
staticprivate

◆ skipUnusedKeys()

template<class BtreeLayout >
void mongo::BtreeLogic< BtreeLayout >::skipUnusedKeys ( OperationContext *  opCtx,
DiskLoc loc,
int *  pos,
int  direction 
) const
private

◆ split()

template<class BtreeLayout >
void mongo::BtreeLogic< BtreeLayout >::split ( OperationContext *  opCtx,
BucketType bucket,
const DiskLoc  bucketLoc,
int  keypos,
const DiskLoc  recordLoc,
const KeyDataType key,
const DiskLoc  lchild,
const DiskLoc  rchild 
)
private

◆ splitPos()

template<class BtreeLayout >
int mongo::BtreeLogic< BtreeLayout >::splitPos ( BucketType bucket,
int  keypos 
)
staticprivate

In the standard btree algorithm, we would split based on the existing keys and the new key.

But that's more work to implement, so we split the existing keys and then add the new key.

There are several published heuristic algorithms for doing splits, but basically what you want are (1) even balancing between the two sides and (2) a small split key so the parent can have a larger branching factor.

We just have a simple algorithm right now: if a key includes the halfway point (or 10% way point) in terms of bytes, split on that key; otherwise split on the key immediately to the left of the halfway point (or 10% point).

This function is expected to be called on a packed bucket.

◆ totalDataSize()

template<class BtreeLayout >
int mongo::BtreeLogic< BtreeLayout >::totalDataSize ( BucketType bucket)
staticprivate

◆ touch()

template<class BtreeLayout >
Status mongo::BtreeLogic< BtreeLayout >::touch ( OperationContext *  opCtx) const

◆ truncateTo()

template<class BtreeLayout >
void mongo::BtreeLogic< BtreeLayout >::truncateTo ( BucketType bucket,
int  N,
int &  refPos 
)
private

◆ tryBalanceChildren()

template<class BtreeLayout >
bool mongo::BtreeLogic< BtreeLayout >::tryBalanceChildren ( OperationContext *  opCtx,
BucketType bucket,
const DiskLoc  bucketLoc,
int  leftIndex 
)
private

◆ unindex()

template<class BtreeLayout >
bool mongo::BtreeLogic< BtreeLayout >::unindex ( OperationContext *  opCtx,
const BSONObj &  key,
const DiskLoc recordLoc 
)

◆ wouldCreateDup()

template<class BtreeLayout >
bool mongo::BtreeLogic< BtreeLayout >::wouldCreateDup ( OperationContext *  opCtx,
const KeyDataType key,
const DiskLoc  self 
) const
private

Friends And Related Function Documentation

◆ ArtificialTreeBuilder< BtreeLayout >

template<class BtreeLayout>
friend class ArtificialTreeBuilder< BtreeLayout >
friend

◆ BtreeLogic::Builder

template<class BtreeLayout>
friend class BtreeLogic::Builder
friend

◆ BtreeLogicTestBase< BtreeLayout >

template<class BtreeLayout>
friend class BtreeLogicTestBase< BtreeLayout >
friend

Member Data Documentation

◆ _cursorRegistry

template<class BtreeLayout>
SavedCursorRegistry* mongo::BtreeLogic< BtreeLayout >::_cursorRegistry
private

◆ _headManager

template<class BtreeLayout>
HeadManager* mongo::BtreeLogic< BtreeLayout >::_headManager
private

◆ _indexName

template<class BtreeLayout>
std::string mongo::BtreeLogic< BtreeLayout >::_indexName
private

◆ _isUnique

template<class BtreeLayout>
const bool mongo::BtreeLogic< BtreeLayout >::_isUnique
private

◆ _ordering

template<class BtreeLayout>
Ordering mongo::BtreeLogic< BtreeLayout >::_ordering
private

◆ _recordStore

template<class BtreeLayout>
RecordStore* mongo::BtreeLogic< BtreeLayout >::_recordStore
private

The documentation for this class was generated from the following files: