25 #ifndef MXNET_KVSTORE_H_ 26 #define MXNET_KVSTORE_H_ 30 #include <unordered_map> 34 #include "../../src/kvstore/gradient_compression.h" 36 #if MXNET_USE_DIST_KVSTORE 38 #endif // MXNET_USE_DIST_KVSTORE 74 static KVStore *Create(
const char *type =
"local");
79 inline const std::string&
type() {
return type_; }
86 virtual void SetGradientCompression(
const std::vector<std::pair<std::string, std::string> >
105 virtual void Init(
const std::vector<int>& keys,
106 const std::vector<NDArray>& values) = 0;
112 virtual void Init(
const std::vector<std::string>& str_keys,
113 const std::vector<NDArray>& values) = 0;
150 virtual void Push(
const std::vector<int>& keys,
151 const std::vector<NDArray>& values,
152 int priority = 0) = 0;
160 virtual void Push(
const std::vector<std::string>& str_keys,
161 const std::vector<NDArray>& values,
162 int priority = 0) = 0;
187 virtual void Pull(
const std::vector<int>& keys,
188 const std::vector<NDArray*>& values,
189 int priority = 0,
bool ignore_sparse =
true) = 0;
197 virtual void Pull(
const std::vector<std::string>& str_keys,
198 const std::vector<NDArray*>& values,
199 int priority = 0,
bool ignore_sparse =
true) = 0;
209 virtual void PullRowSparse(
const std::vector<int>& str_keys,
210 const std::vector<std::pair<NDArray*, NDArray>>& val_rowids,
211 int priority = 0) = 0;
221 virtual void PullRowSparse(
const std::vector<std::string>& str_keys,
222 const std::vector<std::pair<NDArray*, NDArray>>& val_rowids,
223 int priority = 0) = 0;
228 typedef std::function<void(int, const NDArray&, NDArray*)>
Updater;
232 typedef std::function<void(const std::string&, const NDArray&, NDArray*)>
StrUpdater;
243 CHECK(updater) <<
"invalid updater";
257 CHECK(updater) <<
"invalid updater";
258 str_updater_ = updater;
269 static void InitPSEnv(
const std::unordered_map<std::string, std::string>& envs) {
270 #if MXNET_USE_DIST_KVSTORE 271 ps::Environment::Init(envs);
273 LOG(FATAL) <<
"compile with USE_DIST_KVSTORE=1 to init parameter server's environment";
274 #endif // MXNET_USE_DIST_KVSTORE 283 #if MXNET_USE_DIST_KVSTORE 284 const char* role_str = ps::Environment::Get()->find(
"DMLC_ROLE");
285 return (role_str ==
nullptr) || (!strcmp(role_str,
"worker"));
288 #endif // MXNET_USE_DIST_KVSTORE 297 #if MXNET_USE_DIST_KVSTORE 298 const char* role_str = ps::Environment::Get()->find(
"DMLC_ROLE");
299 return (role_str !=
nullptr) && (!strcmp(role_str,
"server"));
302 #endif // MXNET_USE_DIST_KVSTORE 306 #if MXNET_USE_DIST_KVSTORE 307 if (!IsWorkerNode()) LOG(FATAL) <<
"barrier_before_exit takes effect only on worker nodes";
308 barrier_before_exit_ = barrier_before_exit;
310 LOG(FATAL) <<
"compile with USE_DIST_KVSTORE=1 to enable barrier";
320 #if MXNET_USE_DIST_KVSTORE 321 const char* role_str = ps::Environment::Get()->find(
"DMLC_ROLE");
322 return (role_str !=
nullptr) && (!strcmp(role_str,
"scheduler"));
325 #endif // MXNET_USE_DIST_KVSTORE 386 const std::string& params) {
387 LOG(INFO) <<
"Unable to pass server the profiler command. If you are using " 388 <<
"distributed kvstore, you need to compile with USE_DIST_KVSTORE=1." 389 <<
"If you are training on single machine, then there is no server process" 390 <<
"to profile. Please profile the worker process instead.";
396 typedef std::function<void(int, const std::string&)>
Controller;
411 virtual void RunServer(
const Controller& controller) { }
438 std::atomic<bool> barrier_before_exit_{
true};
442 #endif // MXNET_KVSTORE_H_ distributed key-value store
Definition: kvstore.h:59
std::function< void(int, const NDArray &, NDArray *)> Updater
the prototype of user-defined updater
Definition: kvstore.h:228
namespace of mxnet
Definition: base.h:118
virtual int get_rank() const
Definition: kvstore.h:334
virtual void set_updater(const StrUpdater &updater)
set an updater with string keys
Definition: kvstore.h:256
Updater updater_
the user-defined updater
Definition: kvstore.h:417
const std::string & type()
return the type
Definition: kvstore.h:79
static bool IsSchedulerNode()
Definition: kvstore.h:319
virtual void Barrier()
global barrier among all worker machines
Definition: kvstore.h:364
static void InitPSEnv(const std::unordered_map< std::string, std::string > &envs)
initalize ps-lite environment variables
Definition: kvstore.h:269
static bool IsWorkerNode()
Definition: kvstore.h:282
virtual ~KVStore()
virtual destructor
Definition: kvstore.h:62
void set_barrier_before_exit(const bool barrier_before_exit)
Definition: kvstore.h:305
virtual void SetServerProfilerCommand(const KVStoreServerProfilerCommand type, const std::string ¶ms)
Sends server profiler commands to all server nodes Only the worker with rank=0 sends the command whic...
Definition: kvstore.h:385
StrUpdater str_updater_
the user-defined updater with string keys
Definition: kvstore.h:422
virtual int get_num_dead_node(int node_id, int timeout=60) const
Definition: kvstore.h:353
std::shared_ptr< kvstore::GradientCompression > gradient_compression_
Gradient compression object starts with GC_NONE mode Used if SetGradientCompression sets the type...
Definition: kvstore.h:433
virtual void RunServer(const Controller &controller)
Run as server (or scheduler)
Definition: kvstore.h:411
std::function< void(const std::string &, const NDArray &, NDArray *)> StrUpdater
the prototype of user-defined updater with string keys
Definition: kvstore.h:232
virtual void SendCommandToServers(int cmd_id, const std::string &cmd_body)
Send a command to all server nodes.
Definition: kvstore.h:377
std::string type_
the kvstore type
Definition: kvstore.h:427
std::function< void(int, const std::string &)> Controller
the prototype of a server controller
Definition: kvstore.h:396
virtual void set_updater(const Updater &updater)
set an updater
Definition: kvstore.h:242
virtual int get_group_size() const
Definition: kvstore.h:341
KVStoreServerProfilerCommand
enum to denote types of commands kvstore sends to server regarding profiler kSetConfig sets profiler ...
Definition: kvstore.h:49
static bool IsServerNode()
Definition: kvstore.h:296