Storage Engine API
record_store.h
Go to the documentation of this file.
1 // record_store.h
2 
31 #pragma once
32 
33 #include <boost/optional.hpp>
34 
35 #include "mongo/base/owned_pointer_vector.h"
36 #include "mongo/bson/mutable/damage_vector.h"
37 #include "mongo/db/exec/collection_scan_common.h"
38 #include "mongo/db/record_id.h"
41 
42 namespace mongo {
43 
44 class CappedCallback;
45 class Collection;
46 struct CompactOptions;
47 struct CompactStats;
48 class MAdvise;
49 class NamespaceDetails;
50 class OperationContext;
51 class RecordFetcher;
52 
53 class RecordStoreCompactAdaptor;
54 class RecordStore;
55 
56 struct ValidateResults;
57 class ValidateAdaptor;
58 
62 class DocWriter {
63 public:
64  virtual void writeDocument(char* buf) const = 0;
65  virtual size_t documentSize() const = 0;
66  virtual bool addPadding() const {
67  return true;
68  }
69 
70 protected:
71  // Can't delete through base pointer.
72  ~DocWriter() = default;
73 };
74 
79 public:
80  virtual ~UpdateNotifier() {}
81  virtual Status recordStoreGoingToUpdateInPlace(OperationContext* opCtx,
82  const RecordId& loc) = 0;
83 };
84 
88 struct Record {
89  RecordId id;
91 };
92 
93 enum ValidateCmdLevel : int {
97 };
98 
99 
142 public:
143  virtual ~RecordCursor() = default;
144 
149  virtual boost::optional<Record> next() = 0;
150 
151  //
152  // Saving and restoring state
153  //
154 
162  virtual void save() = 0;
163 
177  virtual bool restore() = 0;
178 
186  virtual void detachFromOperationContext() = 0;
187 
194  virtual void reattachToOperationContext(OperationContext* opCtx) = 0;
195 
203  virtual void invalidate(OperationContext* opCtx, const RecordId& id) {}
204 
205  //
206  // RecordFetchers
207  //
208  // Storage engines which do not support document-level locking hold locks at collection or
209  // database granularity. As an optimization, these locks can be yielded when a record needs
210  // to be fetched from secondary storage. If this method returns non-NULL, then it indicates
211  // that the query system layer should yield its locks, following the protocol defined by the
212  // RecordFetcher class, so that a potential page fault is triggered out of the lock.
213  //
214  // Storage engines which support document-level locking need not implement this.
215  //
216  // TODO see if these can be replaced by WriteConflictException.
217  //
218 
222  virtual std::unique_ptr<RecordFetcher> fetcherForNext() const {
223  return {};
224  }
225 };
226 
237 public:
244  virtual boost::optional<Record> seekExact(const RecordId& id) = 0;
245 
256  virtual void saveUnpositioned() {
257  save();
258  }
259 
263  virtual std::unique_ptr<RecordFetcher> fetcherForId(const RecordId& id) const {
264  return {};
265  }
266 };
267 
282 class RecordStore {
284 
285 public:
286  RecordStore(StringData ns) : _ns(ns.toString()) {}
287 
288  virtual ~RecordStore() {}
289 
290  // META
291 
292  // name of the RecordStore implementation
293  virtual const char* name() const = 0;
294 
295  virtual const std::string& ns() const {
296  return _ns;
297  }
298 
299  virtual const std::string& getIdent() const = 0;
300 
305  virtual long long dataSize(OperationContext* opCtx) const = 0;
306 
311  virtual long long numRecords(OperationContext* opCtx) const = 0;
312 
313  virtual bool isCapped() const = 0;
314 
316  MONGO_UNREACHABLE;
317  }
318 
324  virtual int64_t storageSize(OperationContext* opCtx,
325  BSONObjBuilder* extraInfo = NULL,
326  int infoLevel = 0) const = 0;
327 
328  // CRUD related
329 
339  virtual RecordData dataFor(OperationContext* opCtx, const RecordId& loc) const {
341  invariant(findRecord(opCtx, loc, &data));
342  return data;
343  }
344 
359  virtual bool findRecord(OperationContext* opCtx, const RecordId& loc, RecordData* out) const {
360  auto cursor = getCursor(opCtx);
361  auto record = cursor->seekExact(loc);
362  if (!record)
363  return false;
364 
365  record->data.makeOwned(); // Unowned data expires when cursor goes out of scope.
366  *out = std::move(record->data);
367  return true;
368  }
369 
370  virtual void deleteRecord(OperationContext* opCtx, const RecordId& dl) = 0;
371 
372  virtual StatusWith<RecordId> insertRecord(OperationContext* opCtx,
373  const char* data,
374  int len,
375  Timestamp timestamp,
376  bool enforceQuota) = 0;
377 
378  virtual Status insertRecords(OperationContext* opCtx,
379  std::vector<Record>* records,
380  std::vector<Timestamp>* timestamps,
381  bool enforceQuota) {
382  int index = 0;
383  for (auto& record : *records) {
385  record.data.data(),
386  record.data.size(),
387  (*timestamps)[index++],
388  enforceQuota);
389  if (!res.isOK())
390  return res.getStatus();
391 
392  record.id = res.getValue();
393  }
394  return Status::OK();
395  }
396 
406  virtual Status insertRecordsWithDocWriter(OperationContext* opCtx,
407  const DocWriter* const* docs,
408  const Timestamp* timestamps,
409  size_t nDocs,
410  RecordId* idsOut = nullptr) = 0;
411 
416  const DocWriter* doc,
417  Timestamp timestamp) {
418  RecordId out;
419  Status status = insertRecordsWithDocWriter(opCtx, &doc, &timestamp, 1, &out);
420  if (!status.isOK())
421  return status;
422  return out;
423  }
424 
437  virtual Status updateRecord(OperationContext* opCtx,
438  const RecordId& oldLocation,
439  const char* data,
440  int len,
441  bool enforceQuota,
442  UpdateNotifier* notifier) = 0;
443 
451  virtual bool updateWithDamagesSupported() const = 0;
452 
461  virtual StatusWith<RecordData> updateWithDamages(OperationContext* opCtx,
462  const RecordId& loc,
463  const RecordData& oldRec,
464  const char* damageSource,
465  const mutablebson::DamageVector& damages) = 0;
466 
475  virtual std::unique_ptr<SeekableRecordCursor> getCursor(OperationContext* opCtx,
476  bool forward = true) const = 0;
477 
483  virtual std::unique_ptr<RecordCursor> getCursorForRepair(OperationContext* opCtx) const {
484  return {};
485  }
486 
498  virtual std::unique_ptr<RecordCursor> getRandomCursor(OperationContext* opCtx) const {
499  return {};
500  }
501 
506  virtual std::vector<std::unique_ptr<RecordCursor>> getManyCursors(
507  OperationContext* opCtx) const {
508  std::vector<std::unique_ptr<RecordCursor>> out(1);
509  out[0] = getCursor(opCtx);
510  return out;
511  }
512 
513  // higher level
514 
515 
519  virtual Status truncate(OperationContext* opCtx) = 0;
520 
527  virtual void cappedTruncateAfter(OperationContext* opCtx, RecordId end, bool inclusive) = 0;
528 
534  virtual bool compactSupported() const {
535  return false;
536  }
537 
543  virtual bool compactsInPlace() const {
544  MONGO_UNREACHABLE;
545  }
546 
553  virtual Status compact(OperationContext* opCtx,
554  RecordStoreCompactAdaptor* adaptor,
555  const CompactOptions* options,
556  CompactStats* stats) {
557  MONGO_UNREACHABLE;
558  }
559 
569  virtual bool isInRecordIdOrder() const {
570  return false;
571  }
572 
578  virtual Status validate(OperationContext* opCtx,
579  ValidateCmdLevel level,
580  ValidateAdaptor* adaptor,
581  ValidateResults* results,
582  BSONObjBuilder* output) = 0;
583 
588  virtual void appendCustomStats(OperationContext* opCtx,
589  BSONObjBuilder* result,
590  double scale) const = 0;
591 
601  virtual Status touch(OperationContext* opCtx, BSONObjBuilder* output) const {
602  return Status(ErrorCodes::CommandNotSupported,
603  "this storage engine does not support touch");
604  }
605 
613  virtual boost::optional<RecordId> oplogStartHack(OperationContext* opCtx,
614  const RecordId& startingPosition) const {
615  return boost::none;
616  }
617 
631  virtual Status oplogDiskLocRegister(OperationContext* opCtx,
632  const Timestamp& opTime,
633  bool orderedCommit) {
634  return Status::OK();
635  }
636 
644  virtual void waitForAllEarlierOplogWritesToBeVisible(OperationContext* opCtx) const = 0;
645 
649  virtual void updateStatsAfterRepair(OperationContext* opCtx,
650  long long numRecords,
651  long long dataSize) = 0;
652 
656  virtual Status updateCappedSize(OperationContext* opCtx, long long cappedSize) {
657  return Status(ErrorCodes::CommandNotSupported,
658  "this storage engine does not support updateCappedSize");
659  }
660 
661 protected:
662  std::string _ns;
663 };
664 
666 public:
668  virtual bool isDataValid(const RecordData& recData) = 0;
669  virtual size_t dataSize(const RecordData& recData) = 0;
670  virtual void inserted(const RecordData& recData, const RecordId& newLocation) = 0;
671 };
672 
675  valid = true;
676  }
677  bool valid;
678  std::vector<std::string> errors;
679  std::vector<std::string> warnings;
680 };
681 
688 public:
689  virtual ~ValidateAdaptor() {}
690 
691  virtual Status validate(const RecordId& recordId,
692  const RecordData& recordData,
693  size_t* dataSize) = 0;
694 };
695 }
virtual int64_t storageSize(OperationContext *opCtx, BSONObjBuilder *extraInfo=NULL, int infoLevel=0) const =0
Definition: record_store.h:665
AtomicLockStats stats
Definition: lock_state.cpp:92
ValidateCmdLevel
Definition: record_store.h:93
virtual Status recordStoreGoingToUpdateInPlace(OperationContext *opCtx, const RecordId &loc)=0
Definition: record_store.h:94
virtual bool restore()=0
Recovers from potential state changes in underlying data.
virtual void reattachToOperationContext(OperationContext *opCtx)=0
Reattaches to the OperationContext and reacquires any storage-engine state.
Status status
Definition: database_impl.cpp:1020
virtual Status insertRecords(OperationContext *opCtx, std::vector< Record > *records, std::vector< Timestamp > *timestamps, bool enforceQuota)
Definition: record_store.h:378
virtual std::vector< std::unique_ptr< RecordCursor > > getManyCursors(OperationContext *opCtx) const
Returns many RecordCursors that partition the RecordStore into many disjoint sets.
Definition: record_store.h:506
Definition: record_store.h:673
virtual Status insertRecordsWithDocWriter(OperationContext *opCtx, const DocWriter *const *docs, const Timestamp *timestamps, size_t nDocs, RecordId *idsOut=nullptr)=0
Inserts nDocs documents into this RecordStore using the DocWriter interface.
virtual Status oplogDiskLocRegister(OperationContext *opCtx, const Timestamp &opTime, bool orderedCommit)
When we write to an oplog, we call this so that if the storage engine supports doc locking,...
Definition: record_store.h:631
virtual std::unique_ptr< RecordFetcher > fetcherForNext() const
Returns a RecordFetcher if needed for a call to next() or none if unneeded.
Definition: record_store.h:222
Copyright (C) 2014 MongoDB Inc.
Definition: bson_collection_catalog_entry.cpp:38
std::vector< std::string > errors
Definition: record_store.h:678
virtual bool compactsInPlace() const
Does compact() leave RecordIds alone or can they change.
Definition: record_store.h:543
virtual std::unique_ptr< RecordCursor > getRandomCursor(OperationContext *opCtx) const
Constructs a cursor over a record store that returns documents in a randomized order,...
Definition: record_store.h:498
MONGO_DISALLOW_COPYING(RecordStore)
virtual void inserted(const RecordData &recData, const RecordId &newLocation)=0
virtual bool updateWithDamagesSupported() const =0
A replacement for the Record class.
Definition: record_data.h:43
virtual void updateStatsAfterRepair(OperationContext *opCtx, long long numRecords, long long dataSize)=0
Called after a repair operation is run with the recomputed numRecords and dataSize.
StatusWith< RecordId > insertRecordWithDocWriter(OperationContext *opCtx, const DocWriter *doc, Timestamp timestamp)
A thin wrapper around insertRecordsWithDocWriter() to simplify handling of single DocWriters.
Definition: record_store.h:415
virtual const std::string & getIdent() const =0
Definition: record_store.h:96
virtual bool isCapped() const =0
This is so when a RecordStore is validating all records it can call back to someone to check if a rec...
Definition: record_store.h:687
virtual const char * name() const =0
virtual bool isDataValid(const RecordData &recData)=0
Definition: record_store.h:78
virtual Status truncate(OperationContext *opCtx)=0
removes all Records
virtual boost::optional< Record > next()=0
Moves forward and returns the new data or boost::none if there is no more data.
virtual StatusWith< RecordId > insertRecord(OperationContext *opCtx, const char *data, int len, Timestamp timestamp, bool enforceQuota)=0
virtual bool compactSupported() const
does this RecordStore support the compact operation?
Definition: record_store.h:534
bool inclusive
Definition: btree_interface.cpp:335
virtual void waitForAllEarlierOplogWritesToBeVisible(OperationContext *opCtx) const =0
Waits for all writes that completed before this call to be visible to forward scans.
virtual Status validate(const RecordId &recordId, const RecordData &recordData, size_t *dataSize)=0
virtual ~RecordCursor()=default
virtual ~RecordStore()
Definition: record_store.h:288
virtual bool isInRecordIdOrder() const
Does the RecordStore cursor retrieve its document in RecordId Order?
Definition: record_store.h:569
Definition: collection.h:77
Allows inserting a Record "in-place" without creating a copy ahead of time.
Definition: record_store.h:62
std::shared_ptr< void > data
Definition: ephemeral_for_test_record_store_test.cpp:74
The data items stored in a RecordStore.
Definition: record_store.h:88
virtual void invalidate(OperationContext *opCtx, const RecordId &id)
Inform the cursor that this id is being invalidated.
Definition: record_store.h:203
virtual void deleteRecord(OperationContext *opCtx, const RecordId &dl)=0
virtual StatusWith< RecordData > updateWithDamages(OperationContext *opCtx, const RecordId &loc, const RecordData &oldRec, const char *damageSource, const mutablebson::DamageVector &damages)=0
Updates the record positioned at 'loc' in-place using the deltas described by 'damages'.
OperationContext Database StringData CollectionOptions bool const BSONObj &idIndex Status
Definition: database_impl.cpp:955
virtual void detachFromOperationContext()=0
Detaches from the OperationContext and releases any storage-engine state.
virtual void appendCustomStats(OperationContext *opCtx, BSONObjBuilder *result, double scale) const =0
virtual std::unique_ptr< SeekableRecordCursor > getCursor(OperationContext *opCtx, bool forward=true) const =0
Returns a new cursor over this record store.
virtual size_t dataSize(const RecordData &recData)=0
Definition: index_key_validate.h:40
virtual std::unique_ptr< RecordFetcher > fetcherForId(const RecordId &id) const
Returns a RecordFetcher if needed to fetch the provided Record or none if unneeded.
Definition: record_store.h:263
virtual Status updateRecord(OperationContext *opCtx, const RecordId &oldLocation, const char *data, int len, bool enforceQuota, UpdateNotifier *notifier)=0
virtual void saveUnpositioned()
Prepares for state changes in underlying data without necessarily saving the current state.
Definition: record_store.h:256
RecordId id
Definition: record_store.h:89
Retrieves Records from a RecordStore.
Definition: record_store.h:141
virtual RecordData dataFor(OperationContext *opCtx, const RecordId &loc) const
Get the RecordData at loc, which must exist.
Definition: record_store.h:339
An abstraction used for storing documents in a collection or entries in an index.
Definition: record_store.h:282
When a capped collection is modified (delete/insert/etc) then certain notifications need to be made,...
Definition: capped_callback.h:44
virtual const std::string & ns() const
Definition: record_store.h:295
virtual bool findRecord(OperationContext *opCtx, const RecordId &loc, RecordData *out) const
Definition: record_store.h:359
virtual void save()=0
Prepares for state changes in underlying data in a way that allows the cursor's current position to b...
virtual long long numRecords(OperationContext *opCtx) const =0
Total number of record in the RecordStore.
virtual ~ValidateAdaptor()
Definition: record_store.h:689
virtual boost::optional< RecordId > oplogStartHack(OperationContext *opCtx, const RecordId &startingPosition) const
Return the RecordId of an oplog entry as close to startingPosition as possible without being higher.
Definition: record_store.h:613
virtual ~RecordStoreCompactAdaptor()
Definition: record_store.h:667
virtual size_t documentSize() const =0
virtual void writeDocument(char *buf) const =0
virtual std::unique_ptr< RecordCursor > getCursorForRepair(OperationContext *opCtx) const
Constructs a cursor over a potentially corrupted store, which can be used to salvage damaged records.
Definition: record_store.h:483
Adds explicit seeking of records.
Definition: record_store.h:236
virtual Status compact(OperationContext *opCtx, RecordStoreCompactAdaptor *adaptor, const CompactOptions *options, CompactStats *stats)
Attempt to reduce the storage space used by this RecordStore.
Definition: record_store.h:553
std::vector< std::string > warnings
Definition: record_store.h:679
Definition: collection.h:97
virtual Status touch(OperationContext *opCtx, BSONObjBuilder *output) const
Load all data into cache.
Definition: record_store.h:601
bool valid
Definition: record_store.h:677
virtual void cappedTruncateAfter(OperationContext *opCtx, RecordId end, bool inclusive)=0
Truncate documents newer than the document at 'end' from the capped collection.
RecordData data
Definition: record_store.h:90
virtual bool addPadding() const
Definition: record_store.h:66
virtual void setCappedCallback(CappedCallback *)
Definition: record_store.h:315
virtual Status validate(OperationContext *opCtx, ValidateCmdLevel level, ValidateAdaptor *adaptor, ValidateResults *results, BSONObjBuilder *output)=0
virtual Status updateCappedSize(OperationContext *opCtx, long long cappedSize)
used to support online change oplog size.
Definition: record_store.h:656
Collection *const OperationContext *const opCtx
Definition: collection_impl.cpp:80
RecordStore(StringData ns)
Definition: record_store.h:286
~DocWriter()=default
Definition: record_store.h:95
std::string _ns
Definition: record_store.h:662
virtual long long dataSize(OperationContext *opCtx) const =0
The dataSize is an approximation of the sum of the sizes (in bytes) of the documents or entries in th...
virtual boost::optional< Record > seekExact(const RecordId &id)=0
Seeks to a Record with the provided id.
ValidateResults()
Definition: record_store.h:674
virtual ~UpdateNotifier()
Definition: record_store.h:80