Storage Engine API
wiredtiger_kv_engine.h
Go to the documentation of this file.
1 // wiredtiger_kv_engine.h
2 
32 #pragma once
33 
34 #include <list>
35 #include <memory>
36 #include <string>
37 
38 #include <wiredtiger.h>
39 
40 #include "mongo/bson/ordering.h"
41 #include "mongo/bson/timestamp.h"
45 #include "mongo/stdx/functional.h"
46 #include "mongo/stdx/mutex.h"
47 #include "mongo/util/elapsed_tracker.h"
48 
49 namespace mongo {
50 
51 class ClockSource;
52 class JournalListener;
53 class WiredTigerRecordStore;
54 class WiredTigerSessionCache;
55 class WiredTigerSizeStorer;
56 
58  enum class StartupVersion { IS_34, IS_36, IS_40 };
59 
61  bool shouldDowngrade(bool readOnly, bool repairMode, bool hasRecoveryTimestamp);
62  std::string getDowngradeString();
63 };
64 
65 class WiredTigerKVEngine final : public KVEngine {
66 public:
67  static const int kDefaultJournalDelayMillis;
68  WiredTigerKVEngine(const std::string& canonicalName,
69  const std::string& path,
70  ClockSource* cs,
71  const std::string& extraOpenOptions,
72  size_t cacheSizeGB,
73  bool durable,
74  bool ephemeral,
75  bool repair,
76  bool readOnly);
77 
78  virtual ~WiredTigerKVEngine();
79 
80  void setRecordStoreExtraOptions(const std::string& options);
81  void setSortedDataInterfaceExtraOptions(const std::string& options);
82 
83  virtual bool supportsDocLocking() const;
84 
85  virtual bool supportsDirectoryPerDB() const;
86 
87  virtual bool isDurable() const {
88  return _durable;
89  }
90 
91  virtual bool isEphemeral() const {
92  return _ephemeral;
93  }
94 
95  virtual RecoveryUnit* newRecoveryUnit();
96 
97  virtual Status createRecordStore(OperationContext* opCtx,
98  StringData ns,
99  StringData ident,
100  const CollectionOptions& options) {
101  return createGroupedRecordStore(opCtx, ns, ident, options, KVPrefix::kNotPrefixed);
102  }
103 
104  virtual std::unique_ptr<RecordStore> getRecordStore(OperationContext* opCtx,
105  StringData ns,
106  StringData ident,
107  const CollectionOptions& options) {
108  return getGroupedRecordStore(opCtx, ns, ident, options, KVPrefix::kNotPrefixed);
109  }
110 
111  virtual Status createSortedDataInterface(OperationContext* opCtx,
112  StringData ident,
113  const IndexDescriptor* desc) {
114  return createGroupedSortedDataInterface(opCtx, ident, desc, KVPrefix::kNotPrefixed);
115  }
116 
118  StringData ident,
119  const IndexDescriptor* desc) {
120  return getGroupedSortedDataInterface(opCtx, ident, desc, KVPrefix::kNotPrefixed);
121  }
122 
123  virtual Status createGroupedRecordStore(OperationContext* opCtx,
124  StringData ns,
125  StringData ident,
126  const CollectionOptions& options,
127  KVPrefix prefix);
128 
129  virtual std::unique_ptr<RecordStore> getGroupedRecordStore(OperationContext* opCtx,
130  StringData ns,
131  StringData ident,
132  const CollectionOptions& options,
133  KVPrefix prefix);
134 
135  virtual Status createGroupedSortedDataInterface(OperationContext* opCtx,
136  StringData ident,
137  const IndexDescriptor* desc,
138  KVPrefix prefix);
139 
140  virtual SortedDataInterface* getGroupedSortedDataInterface(OperationContext* opCtx,
141  StringData ident,
142  const IndexDescriptor* desc,
143  KVPrefix prefix);
144 
145  virtual Status dropIdent(OperationContext* opCtx, StringData ident);
146 
147  virtual void alterIdentMetadata(OperationContext* opCtx,
148  StringData ident,
149  const IndexDescriptor* desc);
150 
151  virtual Status okToRename(OperationContext* opCtx,
152  StringData fromNS,
153  StringData toNS,
154  StringData ident,
155  const RecordStore* originalRecordStore) const;
156 
157  virtual int flushAllFiles(OperationContext* opCtx, bool sync);
158 
159  virtual Status beginBackup(OperationContext* opCtx);
160 
161  virtual void endBackup(OperationContext* opCtx);
162 
163  virtual int64_t getIdentSize(OperationContext* opCtx, StringData ident);
164 
165  virtual Status repairIdent(OperationContext* opCtx, StringData ident);
166 
167  virtual bool hasIdent(OperationContext* opCtx, StringData ident) const;
168 
169  std::vector<std::string> getAllIdents(OperationContext* opCtx) const;
170 
171  virtual void cleanShutdown();
172 
174  return &_sessionCache->snapshotManager();
175  }
176 
177  void setJournalListener(JournalListener* jl) final;
178 
179  virtual void setStableTimestamp(Timestamp stableTimestamp) override;
180 
181  virtual void setInitialDataTimestamp(Timestamp initialDataTimestamp) override;
182 
183  virtual void setOldestTimestampFromStable() override;
184 
185  virtual void setOldestTimestamp(Timestamp newOldestTimestamp) override;
186 
187  virtual bool supportsRecoverToStableTimestamp() const override;
188 
189  virtual StatusWith<Timestamp> recoverToStableTimestamp(OperationContext* opCtx) override;
190 
191  virtual boost::optional<Timestamp> getRecoveryTimestamp() const override;
192 
198  virtual boost::optional<Timestamp> getLastStableCheckpointTimestamp() const override;
199 
200  virtual Timestamp getAllCommittedTimestamp() const override;
201 
202  bool supportsReadConcernSnapshot() const final;
203 
204  // wiredtiger specific
205  // Calls WT_CONNECTION::reconfigure on the underlying WT_CONNECTION
206  // held by this class
207  int reconfigure(const char* str);
208 
209  WT_CONNECTION* getConnection() {
210  return _conn;
211  }
212  void dropSomeQueuedIdents();
213  std::list<WiredTigerCachedCursor> filterCursorsWithQueuedDrops(
214  std::list<WiredTigerCachedCursor>* cache);
215  bool haveDropsQueued() const;
216 
217  void syncSizeInfo(bool sync) const;
218 
219  /*
220  * An oplog manager is always accessible, but this method will start the background thread to
221  * control oplog entry visibility for reads.
222  *
223  * On mongod, the background thread will be started when the first oplog record store is
224  * created, and stopped when the last oplog record store is destroyed, at shutdown time. For
225  * unit tests, the background thread may be started and stopped multiple times as tests create
226  * and destroy oplog record stores.
227  */
228  void startOplogManager(OperationContext* opCtx,
229  const std::string& uri,
230  WiredTigerRecordStore* oplogRecordStore);
231  void haltOplogManager();
232 
233  /*
234  * Always returns a non-nil pointer. However, the WiredTigerOplogManager may not have been
235  * initialized and its background refreshing thread may not be running.
236  *
237  * A caller that wants to get the oplog read timestamp, or call
238  * `waitForAllEarlierOplogWritesToBeVisible`, is advised to first see if the oplog manager is
239  * running with a call to `isRunning`.
240  *
241  * A caller that simply wants to call `triggerJournalFlush` may do so without concern.
242  */
244  return _oplogManager.get();
245  }
246 
247  /*
248  * This function is called when replication has completed a batch. In this function, we
249  * refresh our oplog visiblity read-at-timestamp value.
250  */
251  void replicationBatchIsComplete() const override;
252 
258  static void setInitRsOplogBackgroundThreadCallback(stdx::function<bool(StringData)> cb);
259 
266  static bool initRsOplogBackgroundThread(StringData ns);
267 
268  static void appendGlobalStats(BSONObjBuilder& b);
269 
270  bool isCacheUnderPressure(OperationContext* opCtx) const override;
271 
276  Timestamp getStableTimestamp() const;
277  Timestamp getOldestTimestamp() const;
278 
279 private:
282 
283  Status _salvageIfNeeded(const char* uri);
284  void _checkIdentPath(StringData ident);
285 
286  bool _hasUri(WT_SESSION* session, const std::string& uri) const;
287 
288  std::string _uri(StringData ident) const;
289 
298  Timestamp _calculateHistoryLagFromStableTimestamp(Timestamp stableTimestamp);
299 
305  void _setOldestTimestamp(Timestamp newOldestTimestamp, bool force);
306 
307  WT_CONNECTION* _conn;
308  WT_EVENT_HANDLER _eventHandler;
309  std::unique_ptr<WiredTigerSessionCache> _sessionCache;
310  ClockSource* const _clockSource;
311 
312  // Mutex to protect use of _oplogManagerCount by this instance of KV engine.
313  mutable stdx::mutex _oplogManagerMutex;
314  std::size_t _oplogManagerCount = 0;
315  std::unique_ptr<WiredTigerOplogManager> _oplogManager;
316 
317  std::string _canonicalName;
318  std::string _path;
319  std::string _wtOpenConfig;
320 
321  std::unique_ptr<WiredTigerSizeStorer> _sizeStorer;
322  std::string _sizeStorerUri;
323  mutable ElapsedTracker _sizeStorerSyncTracker;
324 
325  bool _durable;
327  const bool _inRepairMode;
328  bool _readOnly;
329  std::unique_ptr<WiredTigerJournalFlusher> _journalFlusher; // Depends on _sizeStorer
330  std::unique_ptr<WiredTigerCheckpointThread> _checkpointThread;
331 
332  std::string _rsOptions;
333  std::string _indexOptions;
334 
335  mutable stdx::mutex _dropAllQueuesMutex;
336  mutable stdx::mutex _identToDropMutex;
337  std::list<std::string> _identToDrop;
338 
340 
341  std::unique_ptr<WiredTigerSession> _backupSession;
344 
345  // Ensures accesses to _oldestTimestamp and _stableTimestamp, respectively, are multi-core safe.
346  mutable stdx::mutex _oldestTimestampMutex;
347  mutable stdx::mutex _stableTimestampMutex;
348 
349  // Tracks the stable and oldest timestamps we've set on the storage engine.
350  Timestamp _oldestTimestamp;
351  Timestamp _stableTimestamp;
352 };
353 }
Definition: wiredtiger_kv_engine.cpp:250
std::string _canonicalName
Definition: wiredtiger_kv_engine.h:317
StartupVersion _startupVersion
Definition: wiredtiger_kv_engine.h:60
WT_CONNECTION * getConnection()
Definition: wiredtiger_kv_engine.h:209
Definition: kv_engine.h:53
virtual bool isEphemeral() const
Returns true if the KVEngine is ephemeral – that is, it is NOT persistent and all data is lost after...
Definition: wiredtiger_kv_engine.h:91
Definition: wiredtiger_kv_engine.cpp:206
virtual bool isDurable() const
Definition: wiredtiger_kv_engine.h:87
std::string _wtOpenConfig
Definition: wiredtiger_kv_engine.h:319
stdx::mutex _stableTimestampMutex
Definition: wiredtiger_kv_engine.h:347
Definition: wiredtiger_record_store.h:73
std::unique_ptr< WiredTigerCheckpointThread > _checkpointThread
Definition: wiredtiger_kv_engine.h:330
stdx::mutex _oplogManagerMutex
Definition: wiredtiger_kv_engine.h:313
void setJournalListener(JournalListener *jl)
Definition: dur.cpp:906
Collection *const const NamespaceString & ns
Definition: collection_info_cache_impl.cpp:53
virtual Status createRecordStore(OperationContext *opCtx, StringData ns, StringData ident, const CollectionOptions &options)
The create and drop methods on KVEngine are not transactional.
Definition: wiredtiger_kv_engine.h:97
virtual SortedDataInterface * getSortedDataInterface(OperationContext *opCtx, StringData ident, const IndexDescriptor *desc)
Definition: wiredtiger_kv_engine.h:117
Definition: collection_options.h:57
Copyright (C) 2014 MongoDB Inc.
Definition: bson_collection_catalog_entry.cpp:38
WiredTigerFileVersion _fileVersion
Definition: wiredtiger_kv_engine.h:343
ElapsedTracker _sizeStorerSyncTracker
Definition: wiredtiger_kv_engine.h:323
std::list< std::string > _identToDrop
Definition: wiredtiger_kv_engine.h:337
StartupVersion
Definition: wiredtiger_kv_engine.h:58
stdx::mutex _dropAllQueuesMutex
Definition: wiredtiger_kv_engine.h:335
OperationContext Database StringData BSONObj CollectionOptions::ParseKind bool const BSONObj &idIndex Status
Definition: database_impl.cpp:956
bool _readOnly
Definition: wiredtiger_kv_engine.h:328
const bool _inRepairMode
Definition: wiredtiger_kv_engine.h:327
bool shouldDowngrade(bool readOnly, bool repairMode, bool hasRecoveryTimestamp)
Definition: wiredtiger_kv_engine.cpp:97
ClockSource *const _clockSource
Definition: wiredtiger_kv_engine.h:310
std::string _sizeStorerUri
Definition: wiredtiger_kv_engine.h:322
std::unique_ptr< WiredTigerJournalFlusher > _journalFlusher
Definition: wiredtiger_kv_engine.h:329
Date_t _previousCheckedDropsQueued
Definition: wiredtiger_kv_engine.h:339
std::unique_ptr< WiredTigerSizeStorer > _sizeStorer
Definition: wiredtiger_kv_engine.h:321
Timestamp _oldestTimestamp
Definition: wiredtiger_kv_engine.h:350
Definition: wiredtiger_kv_engine.h:65
virtual std::unique_ptr< RecordStore > getRecordStore(OperationContext *opCtx, StringData ns, StringData ident, const CollectionOptions &options)
Having multiple out for the same ns is a rules violation; Calling on a non-created ident is invalid a...
Definition: wiredtiger_kv_engine.h:104
This class allows for the storageEngine to alert the rest of the system about journaled write progres...
Definition: journal_listener.h:48
static const KVPrefix kNotPrefixed
Definition: kv_prefix.h:47
WiredTigerSessionCache * _sessionCache
Definition: wiredtiger_prefixed_index_test.cpp:101
std::unique_ptr< WiredTigerSessionCache > _sessionCache
Definition: wiredtiger_kv_engine.h:309
WT_CONNECTION * _conn
Definition: wiredtiger_kv_engine.h:307
Definition: wiredtiger_oplog_manager.h:47
std::string _path
Definition: wiredtiger_kv_engine.h:318
Definition: index_key_validate.h:40
std::string _indexOptions
Definition: wiredtiger_kv_engine.h:333
stdx::mutex _oldestTimestampMutex
Definition: wiredtiger_kv_engine.h:346
A RecoveryUnit is responsible for ensuring that data is persisted.
Definition: recovery_unit.h:51
bool _durable
Definition: wiredtiger_kv_engine.h:325
std::unique_ptr< WiredTigerOplogManager > _oplogManager
Definition: wiredtiger_kv_engine.h:315
stdx::mutex _identToDropMutex
Definition: wiredtiger_kv_engine.h:336
std::string _rsOptions
Definition: wiredtiger_kv_engine.h:332
An abstraction used for storing documents in a collection or entries in an index. ...
Definition: record_store.h:282
WT_EVENT_HANDLER _eventHandler
Definition: wiredtiger_kv_engine.h:308
WiredTigerOplogManager * getOplogManager() const
Definition: wiredtiger_kv_engine.h:243
OperationContext Database StringData BSONObj options
Definition: database_impl.cpp:949
std::string getDowngradeString()
Definition: wiredtiger_kv_engine.cpp:143
A KVPrefix may be prepended to the keys of entries in an underlying KV store.
Definition: kv_prefix.h:44
WT_CONNECTION * _conn
Definition: wiredtiger_prefixed_index_test.cpp:100
Timestamp _stableTimestamp
Definition: wiredtiger_kv_engine.h:351
Timestamp _recoveryTimestamp
Definition: wiredtiger_kv_engine.h:342
Manages snapshots that can be read from at a later time.
Definition: snapshot_manager.h:47
WiredTigerOplogManager _oplogManager
Definition: wiredtiger_prefixed_index_test.cpp:102
virtual Status createSortedDataInterface(OperationContext *opCtx, StringData ident, const IndexDescriptor *desc)
Definition: wiredtiger_kv_engine.h:111
Collection *const OperationContext *const opCtx
Definition: collection_impl.cpp:80
bool _ephemeral
Definition: wiredtiger_kv_engine.h:326
OperationContext const IndexDescriptor * desc
Definition: index_catalog_impl.cpp:97
static const int kDefaultJournalDelayMillis
Definition: wiredtiger_kv_engine.h:67
std::unique_ptr< WiredTigerSession > _backupSession
Definition: wiredtiger_kv_engine.h:341
Definition: wiredtiger_kv_engine.h:57
std::string uri
Definition: wiredtiger_standard_record_store_test.cpp:367
SnapshotManager * getSnapshotManager() const final
Return the SnapshotManager for this KVEngine or NULL if not supported.
Definition: wiredtiger_kv_engine.h:173
This interface is a work in progress.
Definition: sorted_data_interface.h:64