Storage Engine API
key_string.h
Go to the documentation of this file.
1 // key_string.h
2 
31 #pragma once
32 
33 #include <limits>
34 
35 #include "mongo/base/static_assert.h"
36 #include "mongo/bson/bsonmisc.h"
37 #include "mongo/bson/bsonobj.h"
38 #include "mongo/bson/bsonobjbuilder.h"
39 #include "mongo/bson/ordering.h"
40 #include "mongo/bson/timestamp.h"
41 #include "mongo/db/record_id.h"
42 #include "mongo/platform/decimal128.h"
43 #include "mongo/util/assert_util.h"
44 
45 namespace mongo {
46 
47 class KeyString {
48 public:
52  enum class Version : uint8_t { V0 = 0, V1 = 1 };
53  static StringData versionToString(Version version) {
54  return version == Version::V0 ? "V0" : "V1";
55  }
56 
61 
67  class TypeBits {
68  public:
69  // Sufficient bytes to encode extra type information for any BSON key that fits in 1KB.
70  // The encoding format will need to change if we raise this limit.
71  static const uint8_t kMaxBytesNeeded = 127;
72  static const uint32_t kMaxKeyBytes = 1024;
73  static const uint32_t kMaxTypeBitsPerDecimal = 17;
74  static const uint32_t kBytesForTypeAndEmptyKey = 2;
75  static const uint32_t kMaxDecimalsPerKey =
76  kMaxKeyBytes / (sizeof(Decimal128::Value) + kBytesForTypeAndEmptyKey);
77  MONGO_STATIC_ASSERT_MSG(
78  kMaxTypeBitsPerDecimal* kMaxDecimalsPerKey < kMaxBytesNeeded * 8UL,
79  "encoding needs change to contain all type bits for worst case key");
80  static const uint8_t kStoredDecimalExponentBits = 6;
81  static const uint32_t kStoredDecimalExponentMask = (1U << kStoredDecimalExponentBits) - 1;
82 
83  explicit TypeBits(Version version) : version(version) {
84  reset();
85  }
86 
91  void resetFromBuffer(BufReader* reader);
92  static TypeBits fromBuffer(Version version, BufReader* reader) {
93  TypeBits out(version);
94  out.resetFromBuffer(reader);
95  return out;
96  }
97 
101  bool isAllZeros() const {
102  return _isAllZeros;
103  }
104 
128  const uint8_t* getBuffer() const {
129  return getSize() == 1 ? _buf + 1 : _buf;
130  }
131  size_t getSize() const {
132  if (_isAllZeros) { // Case 2
133  dassert(_buf[1] == 0);
134  return 1;
135  }
136 
137  uint8_t rawSize = getSizeByte();
138  dassert(rawSize >= 1); // 0 should be handled as isAllZeros.
139  if (rawSize == 1 && !(_buf[1] & 0x80)) { // Case 3
140  return 1;
141  }
142 
143  return rawSize + 1; // Case 1
144  }
145 
146  //
147  // Everything below is only for use by KeyString.
148  //
149 
150  // Note: No space is used if all bits are 0 so the most common cases should be 0x0.
151  static const uint8_t kString = 0x0;
152  static const uint8_t kSymbol = 0x1;
153 
154  static const uint8_t kInt = 0x0;
155  static const uint8_t kLong = 0x1;
156  static const uint8_t kDouble = 0x2;
157  static const uint8_t kDecimal = 0x3; // indicates 6 more bits of typeinfo follow.
158  static const uint8_t kSpecialZeroPrefix = 0x3; // kNumericZero case, 3 more bits follow.
159  static const uint8_t kNegativeDoubleZero = 0x3; // normalized -0.0 double, either V0 or V1.
160  static const uint8_t kV0NegativeDoubleZero = 0x3; // legacy encoding for V0
161 
162  // The following describe the initial 5 type bits for kNegativeOrDecimalZero. These bits
163  // encode double -0 or a 3-bit prefix (range 0 to 5) of the 15-bit decimal zero type.
164  static const uint8_t kV1NegativeDoubleZero = 0x18; // 0b11000
165 
166  static const uint8_t kUnusedEncoding = 0x19; // 0b11001
167 
168  // There are 6 * (1<<12) == 2 * (kMaxBiasedExponent + 1) == 24576 decimal zeros.
169  static const uint8_t kDecimalZero0xxx = 0x1a; // 0b11010 12 more exponent bits follow
170  static const uint8_t kDecimalZero1xxx = 0x1b; // 0b11011
171  static const uint8_t kDecimalZero2xxx = 0x1c; // 0b11100
172  static const uint8_t kDecimalZero3xxx = 0x1d; // 0b11101
173  static const uint8_t kDecimalZero4xxx = 0x1e; // 0b11110
174  static const uint8_t kDecimalZero5xxx = 0x1f; // 0b11111
175 
176  void reset() {
177  _curBit = 0;
178  _isAllZeros = true;
179  setSizeByte(0);
180  _buf[1] = 0;
181  }
182 
183  void appendString() {
184  appendBit(kString);
185  }
186  void appendSymbol() {
187  appendBit(kSymbol);
188  }
189 
191  appendBit(kDouble >> 1);
192  appendBit(kDouble & 1);
193  }
195  appendBit(kInt >> 1);
196  appendBit(kInt & 1);
197  }
199  appendBit(kLong >> 1);
200  appendBit(kLong & 1);
201  }
203  appendBit(kDecimal >> 1);
204  appendBit(kDecimal & 1);
205  }
206  void appendZero(uint8_t zeroType);
207  void appendDecimalZero(uint32_t whichZero);
208  void appendDecimalExponent(uint8_t storedExponentBits);
209 
210  class Reader {
211  public:
215  explicit Reader(const TypeBits& typeBits) : _curBit(0), _typeBits(typeBits) {}
216 
217  uint8_t readStringLike() {
218  return readBit();
219  }
220  uint8_t readNumeric() {
221  uint8_t highBit = readBit();
222  return (highBit << 1) | readBit();
223  }
224  uint8_t readZero();
225 
226  // Given a decimal zero type between kDecimalZero0xxx and kDecimal5xxx, read the
227  // remaining 12 bits and return which of the 24576 decimal zeros to produce.
228  uint32_t readDecimalZero(uint8_t zeroType);
229 
230  // Reads the stored exponent bits of a non-zero decimal number.
231  uint8_t readDecimalExponent();
232 
233  private:
234  uint8_t readBit();
235 
236  size_t _curBit;
238  };
239 
241 
242  private:
246  uint8_t getSizeByte() const {
247  return _buf[0] & 0x7f;
248  }
249  void setSizeByte(uint8_t size) {
250  // This error can only occur in cases where the key is not only too long, but also
251  // has too many fields requiring type bits.
252  uassert(ErrorCodes::KeyTooLong, "The key is too long", size < kMaxBytesNeeded);
253  _buf[0] = 0x80 | size;
254  }
255 
256  void appendBit(uint8_t oneOrZero);
257 
258  size_t _curBit;
260 
261  // See getBuffer()/getSize() documentation for a description of how data is encoded.
262  // Currently whole buffer is copied in default copy methods. If they ever show up as hot
263  // in profiling, we should add copy operations that only copy the parts of _buf that are
264  // in use.
265  uint8_t _buf[1 /*size*/ + kMaxBytesNeeded];
266  };
267 
269  kInclusive, // Anything to be stored in an index must use this.
272  };
273 
282  };
283 
284  explicit KeyString(Version version) : version(version), _typeBits(version) {}
285 
286  KeyString(Version version, const BSONObj& obj, Ordering ord, RecordId recordId)
287  : KeyString(version) {
288  resetToKey(obj, ord, recordId);
289  }
290 
292  const BSONObj& obj,
293  Ordering ord,
294  Discriminator discriminator = kInclusive)
295  : KeyString(version) {
296  resetToKey(obj, ord, discriminator);
297  }
298 
299  KeyString(Version version, RecordId rid) : version(version), _typeBits(version) {
300  appendRecordId(rid);
301  }
302 
303  static size_t getKeySize(const char* buffer,
304  size_t len,
305  Ordering ord,
306  const TypeBits& typeBits);
307  static BSONObj toBson(StringData data, Ordering ord, const TypeBits& types);
315  static BSONObj toBson(const char* buffer,
316  size_t len,
317  Ordering ord,
318  const TypeBits& types) noexcept;
319  static BSONObj toBsonSafe(const char* buffer, size_t len, Ordering ord, const TypeBits& types);
320 
324  static RecordId decodeRecordIdAtEnd(const void* buf, size_t size);
325 
329  static RecordId decodeRecordId(BufReader* reader);
330 
331  void appendRecordId(RecordId loc);
332  void appendTypeBits(const TypeBits& bits);
333 
338  void resetToEmpty() {
339  _buffer.reset();
340  _typeBits.reset();
341  }
342 
343  void resetToKey(const BSONObj& obj, Ordering ord, RecordId recordId);
344  void resetToKey(const BSONObj& obj, Ordering ord, Discriminator discriminator = kInclusive);
345  void resetFromBuffer(const void* buffer, size_t size) {
346  _buffer.reset();
347  memcpy(_buffer.skip(size), buffer, size);
348  }
349 
350  const char* getBuffer() const {
351  return _buffer.buf();
352  }
353  size_t getSize() const {
354  return _buffer.len();
355  }
356  bool isEmpty() const {
357  return _buffer.len() == 0;
358  }
359 
360  const TypeBits& getTypeBits() const {
361  return _typeBits;
362  }
363 
364  int compare(const KeyString& other) const;
365 
369  std::string toString() const;
370 
376 
377 private:
378  void _appendAllElementsForIndexing(const BSONObj& obj,
379  Ordering ord,
380  Discriminator discriminator);
381 
382  void _appendBool(bool val, bool invert);
383  void _appendDate(Date_t val, bool invert);
384  void _appendTimestamp(Timestamp val, bool invert);
385  void _appendOID(OID val, bool invert);
386  void _appendString(StringData val, bool invert);
387  void _appendSymbol(StringData val, bool invert);
388  void _appendCode(StringData val, bool invert);
389  void _appendCodeWString(const BSONCodeWScope& val, bool invert);
390  void _appendBinData(const BSONBinData& val, bool invert);
391  void _appendRegex(const BSONRegEx& val, bool invert);
392  void _appendDBRef(const BSONDBRef& val, bool invert);
393  void _appendArray(const BSONArray& val, bool invert);
394  void _appendObject(const BSONObj& val, bool invert);
395  void _appendNumberDouble(const double num, bool invert);
396  void _appendNumberLong(const long long num, bool invert);
397  void _appendNumberInt(const int num, bool invert);
398  void _appendNumberDecimal(const Decimal128 num, bool invert);
399 
405  void _appendBsonValue(const BSONElement& elem, bool invert, const StringData* name);
406 
407  void _appendStringLike(StringData str, bool invert);
408  void _appendBson(const BSONObj& obj, bool invert);
409  void _appendSmallDouble(double value, DecimalContinuationMarker dcm, bool invert);
410  void _appendLargeDouble(double value, DecimalContinuationMarker dcm, bool invert);
411  void _appendInteger(const long long num, bool invert);
412  void _appendPreshiftedIntegerPortion(uint64_t value, bool isNegative, bool invert);
413 
414  void _appendDoubleWithoutTypeBits(const double num, DecimalContinuationMarker dcm, bool invert);
415  void _appendHugeDecimalWithoutTypeBits(const Decimal128 dec, bool invert);
416  void _appendTinyDecimalWithoutTypeBits(const Decimal128 dec, const double bin, bool invert);
417 
418  template <typename T>
419  void _append(const T& thing, bool invert);
420  void _appendBytes(const void* source, size_t bytes, bool invert);
421 
423  StackBufBuilder _buffer;
424 };
425 
426 inline bool operator<(const KeyString& lhs, const KeyString& rhs) {
427  return lhs.compare(rhs) < 0;
428 }
429 
430 inline bool operator<=(const KeyString& lhs, const KeyString& rhs) {
431  return lhs.compare(rhs) <= 0;
432 }
433 
434 inline bool operator==(const KeyString& lhs, const KeyString& rhs) {
435  return lhs.compare(rhs) == 0;
436 }
437 
438 inline bool operator>(const KeyString& lhs, const KeyString& rhs) {
439  return lhs.compare(rhs) > 0;
440 }
441 
442 inline bool operator>=(const KeyString& lhs, const KeyString& rhs) {
443  return lhs.compare(rhs) >= 0;
444 }
445 
446 inline bool operator!=(const KeyString& lhs, const KeyString& rhs) {
447  return !(lhs == rhs);
448 }
449 
450 inline std::ostream& operator<<(std::ostream& stream, const KeyString& value) {
451  return stream << value.toString();
452 }
453 
454 } // namespace mongo
void reset()
Definition: key_string.h:176
void resetToKey(const BSONObj &obj, Ordering ord, RecordId recordId)
Definition: key_string.cpp:325
bool operator<(const KeyString &lhs, const KeyString &rhs)
Definition: key_string.h:426
KeyString(Version version)
Definition: key_string.h:284
void _appendBinData(const BSONBinData &val, bool invert)
Definition: key_string.cpp:486
void appendNumberDouble()
Definition: key_string.h:190
void _appendSymbol(StringData val, bool invert)
Definition: key_string.cpp:469
void appendRecordId(RecordId loc)
Definition: key_string.cpp:385
TypeBits _typeBits
Definition: key_string.h:422
void _appendStringLike(StringData str, bool invert)
– lowest level
Definition: key_string.cpp:859
void _appendDoubleWithoutTypeBits(const double num, DecimalContinuationMarker dcm, bool invert)
Definition: key_string.cpp:539
static BSONObj toBson(StringData data, Ordering ord, const TypeBits &types)
Definition: key_string.cpp:2070
void _appendNumberInt(const int num, bool invert)
Definition: key_string.cpp:617
bool operator==(const IndexKeyEntry &lhs, const IndexKeyEntry &rhs)
Definition: index_entry_comparison.h:54
void _appendAllElementsForIndexing(const BSONObj &obj, Ordering ord, Discriminator discriminator)
Definition: key_string.cpp:340
void _appendBson(const BSONObj &obj, bool invert)
Definition: key_string.cpp:875
Reader(const TypeBits &typeBits)
Passed in TypeBits must outlive this Reader instance.
Definition: key_string.h:215
size_t _curBit
Definition: key_string.h:258
static BSONObj toBsonSafe(const char *buffer, size_t len, Ordering ord, const TypeBits &types)
Definition: key_string.cpp:2038
Copyright (C) 2014 MongoDB Inc.
Definition: bson_collection_catalog_entry.cpp:38
void _appendBytes(const void *source, size_t bytes, bool invert)
Definition: key_string.cpp:1067
size_t getSize() const
Definition: key_string.h:131
void _appendInteger(const long long num, bool invert)
Definition: key_string.cpp:1024
static RecordId decodeRecordId(BufReader *reader)
Decodes a RecordId, consuming all bytes needed from reader.
Definition: key_string.cpp:2085
Definition: key_string.h:47
static RecordId decodeRecordIdAtEnd(const void *buf, size_t size)
Decodes a RecordId from the end of a buffer.
Definition: key_string.cpp:2074
void resetFromBuffer(const void *buffer, size_t size)
Definition: key_string.h:345
Discriminator
Definition: key_string.h:268
Definition: key_string.h:269
Definition: key_string.h:278
const TypeBits & _typeBits
Definition: key_string.h:237
Version
Selects version of KeyString to use.
Definition: key_string.h:52
bool isAllZeros() const
If true, no bits have been set to one.
Definition: key_string.h:101
static const Version kLatestVersion
Provides the latest version of KeyString available.
Definition: key_string.h:60
uint8_t getSizeByte() const
size only includes data bytes, not the size byte itself.
Definition: key_string.h:246
const TypeBits & getTypeBits() const
Definition: key_string.h:360
bool operator!=(const IndexKeyEntry &lhs, const IndexKeyEntry &rhs)
Definition: index_entry_comparison.h:58
uint8_t readStringLike()
Definition: key_string.h:217
bool operator>=(const KeyString &lhs, const KeyString &rhs)
Definition: key_string.h:442
void _appendRegex(const BSONRegEx &val, bool invert)
Definition: key_string.cpp:500
void _appendObject(const BSONObj &val, bool invert)
Definition: key_string.cpp:525
void appendSymbol()
Definition: key_string.h:186
void _appendString(StringData val, bool invert)
Definition: key_string.cpp:463
void appendNumberInt()
Definition: key_string.h:194
std::shared_ptr< void > data
Definition: ephemeral_for_test_record_store_test.cpp:74
StackBufBuilder _buffer
Definition: key_string.h:423
void _appendCodeWString(const BSONCodeWScope &val, bool invert)
Definition: key_string.cpp:480
const Version version
Definition: key_string.h:240
void _appendTimestamp(Timestamp val, bool invert)
Definition: key_string.cpp:453
static size_t getKeySize(const char *buffer, size_t len, Ordering ord, const TypeBits &typeBits)
Definition: key_string.cpp:2015
DecimalContinuationMarker
Encodes the kind of NumberDecimal that is stored.
Definition: key_string.h:277
Definition: key_string.h:210
Definition: key_string.h:270
void _appendBsonValue(const BSONElement &elem, bool invert, const StringData *name)
Definition: key_string.cpp:776
void _appendDate(Date_t val, bool invert)
Definition: key_string.cpp:445
void _appendDBRef(const BSONDBRef &val, bool invert)
Definition: key_string.cpp:509
void _appendSmallDouble(double value, DecimalContinuationMarker dcm, bool invert)
Definition: key_string.cpp:885
void appendString()
Definition: key_string.h:183
void _appendNumberDouble(const double num, bool invert)
Definition: key_string.cpp:530
std::ostream & operator<<(std::ostream &stream, const IndexKeyEntry &entry)
Definition: index_entry_comparison.cpp:37
static StringData versionToString(Version version)
Definition: key_string.h:53
bool isEmpty() const
Definition: key_string.h:356
int compare(const KeyString &other) const
Definition: key_string.cpp:2107
void _appendTinyDecimalWithoutTypeBits(const Decimal128 dec, const double bin, bool invert)
Definition: key_string.cpp:948
bool _isAllZeros
Definition: key_string.h:259
bool operator>(const KeyString &lhs, const KeyString &rhs)
Definition: key_string.h:438
std::string toString() const
Definition: key_string.cpp:2103
void appendTypeBits(const TypeBits &bits)
Definition: key_string.cpp:431
const uint8_t * getBuffer() const
These methods return a buffer and size which encodes all of the type bits in this instance...
Definition: key_string.h:128
Definition: key_string.h:271
TypeBits(Version version)
Definition: key_string.h:83
Encodes info needed to restore the original BSONTypes from a KeyString.
Definition: key_string.h:67
void resetToEmpty()
Resets to an empty state.
Definition: key_string.h:338
void _appendOID(OID val, bool invert)
Definition: key_string.cpp:458
void _appendArray(const BSONArray &val, bool invert)
Definition: key_string.cpp:516
void appendNumberLong()
Definition: key_string.h:198
void _appendHugeDecimalWithoutTypeBits(const Decimal128 dec, bool invert)
Definition: key_string.cpp:1005
void _appendNumberDecimal(const Decimal128 num, bool invert)
Definition: key_string.cpp:622
bool operator<=(const KeyString &lhs, const KeyString &rhs)
Definition: key_string.h:430
KeyString(Version version, RecordId rid)
Definition: key_string.h:299
uint8_t readNumeric()
Definition: key_string.h:220
void appendNumberDecimal()
Definition: key_string.h:202
size_t getSize() const
Definition: key_string.h:353
void _appendCode(StringData val, bool invert)
Definition: key_string.cpp:475
Database *const OperationContext *const const StringData name
Definition: database_impl.cpp:82
KeyString(Version version, const BSONObj &obj, Ordering ord, Discriminator discriminator=kInclusive)
Definition: key_string.h:291
size_t _curBit
Definition: key_string.h:236
void _appendLargeDouble(double value, DecimalContinuationMarker dcm, bool invert)
Definition: key_string.cpp:923
const Version version
Version to use for conversion to/from KeyString.
Definition: key_string.h:375
void _appendPreshiftedIntegerPortion(uint64_t value, bool isNegative, bool invert)
Definition: key_string.cpp:1043
static TypeBits fromBuffer(Version version, BufReader *reader)
Definition: key_string.h:92
void _appendBool(bool val, bool invert)
Definition: key_string.cpp:441
void _append(const T &thing, bool invert)
Definition: key_string.cpp:1063
KeyString(Version version, const BSONObj &obj, Ordering ord, RecordId recordId)
Definition: key_string.h:286
void setSizeByte(uint8_t size)
Definition: key_string.h:249
void resetFromBuffer(BufReader *reader)
If there are no bytes remaining, assumes AllZeros.
Definition: key_string.cpp:2129
const char * getBuffer() const
Definition: key_string.h:350
void _appendNumberLong(const long long num, bool invert)
Definition: key_string.cpp:612