NVIDIA/DALIPublic

NotificationsYou must be signed in to change notification settings
Fork642
Star5.5k

Commit3c97d9b

mzient

authored and

stiepan

committed

Add metadata-only inputs. (#5635)

* Assign a stream to CPU inputs with GPU (non-metadata) inputs* Add Metadata input device - this declares that the input is used for metadata (shape, dtype, etc) access only* Don't synchronize metadata inputs in executor.* Don't prolong the lifetime of metadata-only inputs.* Add InputDevice specifier to random number generators shape_like input.---------Signed-off-by: Michał Zientkiewicz <mzient@gmail.com>

1 parent57716cb commit3c97d9bCopy full SHA for 3c97d9b

File tree

15 files changed

+170

-34

lines changed

dali
- operators
  - generic
  - random
- pipeline
  - executor/executor2
  - operator
    - op_schema.h
    - op_spec.cc

15 files changed

+170

-34

lines changed

`‎dali/operators/generic/cast.cc`

Lines changed: 1 addition & 1 deletion

Original file line number	Diff line number	Diff line change
`@@ -76,7 +76,7 @@ DALI_SCHEMA(Cast)`
`76`	`76`	`DALI_SCHEMA(CastLike)`
`77`	`77`	`.DocStr("Cast the first tensor to the type of the second tensor.")`
`78`	`78`	`.NumInput(2)`
`79`		`- .InputDevice(1, InputDevice::Any)`
	`79`	`+ .InputDevice(1, InputDevice::Metadata)`
`80`	`80`	`.NumOutput(1)`
`81`	`81`	`.AllowSequences()`
`82`	`82`	`.SupportVolumetric();`

`‎dali/operators/generic/constant_value.cc`

Lines changed: 3 additions & 3 deletions

Original file line number	Diff line number	Diff line change
`@@ -84,7 +84,7 @@ DALI_SCHEMA(FullLike)`
`84`	`84`	.DocStr(R"code(Returns new data with the same shape and type as the input data, filled with a `fill_value`.)code")
`85`	`85`	`.NumInput(2)`
`86`	`86`	`.InputDox(0,"data_like","TensorList",R"code(The input data value to copy the shape and type from.)code")`
`87`		`- .InputDevice(0, InputDevice::Any)`
	`87`	`+ .InputDevice(0, InputDevice::Metadata)`
`88`	`88`	`.InputDox(1,"fill_value","TensorList",R"code(The fill value.)code")`
`89`	`89`	`.NumOutput(1);`
`90`	`90`	`DALI_REGISTER_OPERATOR(FullLike, FullLike<CPUBackend>, CPU);`
`@@ -102,7 +102,7 @@ DALI_SCHEMA(ZerosLike)`
`102`	`102`	`.DocStr(R"code(Returns new data with the same shape and type as the input array, filled with zeros.)code")`
`103`	`103`	`.NumInput(1)`
`104`	`104`	`.InputDox(0,"data_like","TensorList",R"code(The input data value to copy the shape and type from.)code")`
`105`		`- .InputDevice(0, InputDevice::Any)`
	`105`	`+ .InputDevice(0, InputDevice::Metadata)`
`106`	`106`	`.NumOutput(1)`
`107`	`107`	`.AddOptionalTypeArg("dtype",R"code(Overrides the output data type.)code", DALI_INT32);`
`108`	`108`	`DALI_REGISTER_OPERATOR(ZerosLike, ZerosLike<CPUBackend>, CPU);`
`@@ -120,7 +120,7 @@ DALI_SCHEMA(OnesLike)`
`120`	`120`	`.DocStr(R"code(Returns new data with the same shape and type as the input array, filled with ones.)code")`
`121`	`121`	`.NumInput(1)`
`122`	`122`	`.InputDox(0,"data_like","TensorList",R"code(The input data value to copy the shape and type from.)code")`
`123`		`- .InputDevice(0, InputDevice::Any)`
	`123`	`+ .InputDevice(0, InputDevice::Metadata)`
`124`	`124`	`.NumOutput(1)`
`125`	`125`	`.AddOptionalTypeArg("dtype",R"code(Overrides the output data type.)code", DALI_INT32);`
`126`	`126`	`DALI_REGISTER_OPERATOR(OnesLike, OnesLike<CPUBackend>, CPU);`

`‎dali/operators/generic/shapes.cc`

Lines changed: 1 addition & 1 deletion

Original file line number	Diff line number	Diff line change
`@@ -19,7 +19,7 @@ namespace dali {`
`19`	`19`	`DALI_SCHEMA(Shapes)`
`20`	`20`	`.DocStr(R"code(Returns the shapes of inputs.)code")`
`21`	`21`	`.NumInput(1)`
`22`		`- .InputDevice(0, InputDevice::Any)`
	`22`	`+ .InputDevice(0, InputDevice::Metadata)`
`23`	`23`	`.NumOutput(1)`
`24`	`24`	`.AllowSequences()`
`25`	`25`	`.SupportVolumetric()`

`‎dali/operators/random/beta_distribution_cpu.cc`

Lines changed: 1 addition & 0 deletions

Original file line number	Diff line number	Diff line change
`@@ -38,6 +38,7 @@ a single value per sample is generated.`
`38`	`38`	`.NumInput(0,1)`
`39`	`39`	`.InputDox(0,"shape_like","TensorList",`
`40`	`40`	`"Shape of this input will be used to infer the shape of the output, if provided.")`
	`41`	`+ .InputDevice(0, InputDevice::Metadata)`
`41`	`42`	`.NumOutput(1)`
`42`	`43`	.AddOptionalArg("alpha",R"code(The alpha parameter, a positive ``float32`` scalar.)code",1.0f,
`43`	`44`	`true)`

`‎dali/operators/random/choice_cpu.cc`

Lines changed: 1 addition & 0 deletions

Original file line number	Diff line number	Diff line change
@@ -42,6 +42,7 @@ that is: :meth:`nvidia.dali.types.DALIDataType`, :meth:`nvidia.dali.types.DALIIm
`42`	`42`	"Otherwise ``__a`` is treated as 1D array of input samples.")
`43`	`43`	`.InputDox(1,"shape_like","TensorList",`
`44`	`44`	`"Shape of this input will be used to infer the shape of the output, if provided.")`
	`45`	`+ .InputDevice(1, InputDevice::Metadata)`
`45`	`46`	`.NumOutput(1)`
`46`	`47`	`.AddOptionalArg<std::vector<float>>("p",`
`47`	`48`	`"Distribution of the probabilities."`

`‎dali/operators/random/coin_flip_cpu.cc`

Lines changed: 2 additions & 0 deletions

Original file line number	Diff line number	Diff line change
`@@ -30,6 +30,7 @@ a single value per sample is generated.`
`30`	`30`	`.NumInput(0,1)`
`31`	`31`	`.InputDox(0,"shape_like","TensorList",`
`32`	`32`	`"Shape of this input will be used to infer the shape of the output, if provided.")`
	`33`	`+ .InputDevice(0, InputDevice::Metadata)`
`33`	`34`	`.NumOutput(1)`
`34`	`35`	`.AddOptionalArg<float>("probability",`
`35`	`36`	`R"code(Probability of value 1.)code",`
`@@ -51,6 +52,7 @@ sample is generated.`
`51`	`52`	`.NumInput(0,1)`
`52`	`53`	`.InputDox(0,"shape_like","TensorList",`
`53`	`54`	`"Shape of this input will be used to infer the shape of the output, if provided.")`
	`55`	`+ .InputDevice(0, InputDevice::Metadata)`
`54`	`56`	`.NumOutput(1)`
`55`	`57`	`.AddParent("random__CoinFlip")`
`56`	`58`	`.Deprecate("random__CoinFlip");// Deprecated in 0.30`

`‎dali/operators/random/normal_distribution_cpu.cc`

Lines changed: 2 additions & 2 deletions

Original file line number	Diff line number	Diff line change
`@@ -28,7 +28,7 @@ a single value per sample is generated.`
`28`	`28`	`.NumInput(0,1)`
`29`	`29`	`.InputDox(0,"shape_like","TensorList",`
`30`	`30`	`"Shape of this input will be used to infer the shape of the output, if provided.")`
`31`		`- .InputDevice(0, InputDevice::Any)`
	`31`	`+ .InputDevice(0, InputDevice::Metadata)`
`32`	`32`	`.NumOutput(1)`
`33`	`33`	`.AddOptionalArg<float>("mean",`
`34`	`34`	`R"code(Mean of the distribution.)code",`
`@@ -51,7 +51,7 @@ a single value per sample is generated.`
`51`	`51`	`.NumInput(0,1)`
`52`	`52`	`.InputDox(0,"shape_like","TensorList",`
`53`	`53`	`"Shape of this input will be used to infer the shape of the output, if provided.")`
`54`		`- .InputDevice(0, InputDevice::Any)`
	`54`	`+ .InputDevice(0, InputDevice::Metadata)`
`55`	`55`	`.NumOutput(1)`
`56`	`56`	`.AddParent("random__Normal")`
`57`	`57`	`.Deprecate("random__Normal");// Deprecated in 0.30`

`‎dali/operators/random/uniform_distribution_cpu.cc`

Lines changed: 2 additions & 0 deletions

Original file line number	Diff line number	Diff line change
`@@ -32,6 +32,7 @@ a single value per sample is generated.`
`32`	`32`	`.NumInput(0,1)`
`33`	`33`	`.InputDox(0,"shape_like","TensorList",`
`34`	`34`	`"Shape of this input will be used to infer the shape of the output, if provided.")`
	`35`	`+ .InputDevice(0, InputDevice::Metadata)`
`35`	`36`	`.NumOutput(1)`
`36`	`37`	`.AddOptionalArg("range",`
`37`	`38`	R"code(Range ``[min, max)`` of a continuous uniform distribution.
`@@ -67,6 +68,7 @@ a single value per sample is generated.`
`67`	`68`	`.NumInput(0,1)`
`68`	`69`	`.InputDox(0,"shape_like","TensorList",`
`69`	`70`	`"Shape of this input will be used to infer the shape of the output, if provided.")`
	`71`	`+ .InputDevice(0, InputDevice::Metadata)`
`70`	`72`	`.NumOutput(1)`
`71`	`73`	`.AddOptionalArg("range",`
`72`	`74`	R"code(Range ``[min, max)`` of a continuous uniform distribution.

`‎dali/pipeline/executor/executor2/exec_graph.h`

Lines changed: 4 additions & 1 deletion

Original file line number	Diff line number	Diff line change
`@@ -90,13 +90,16 @@ struct ExecEdge {`
`90`	`90`	`/** The index of the input in OpSpec. It matches the edge's index in consumer->inputs.*/`
`91`	`91`	`int consumer_input_idx =0;`
`92`	`92`	`StorageDevice device = {};`
	`93`	`+/** The input passes only the metadata, skipping stream synchronization.*/`
	`94`	`+bool metadata =false;`
`93`	`95`
`94`	`96`	`constexprbooloperator==(const ExecEdge &other)const {`
`95`	`97`	`return producer == other.producer &&`
`96`	`98`	`consumer == other.consumer &&`
`97`	`99`	`producer_output_idx == other.producer_output_idx &&`
`98`	`100`	`consumer_input_idx == other.consumer_input_idx &&`
`99`		`- device == other.device;`
	`101`	`+ device == other.device &&`
	`102`	`+ metadata == other.metadata;`
`100`	`103`	`}`
`101`	`104`
`102`	`105`	`constexprbooloperator!=(const ExecEdge &other)const {`

`‎dali/pipeline/executor/executor2/exec_graph_lowering.cc`

Lines changed: 10 additions & 1 deletion

Original file line number	Diff line number	Diff line change
`@@ -50,7 +50,16 @@ void ExecGraph::Lower(const graph::OpGraph &def) {`
`50`	`50`	`for (auto &consumer : out->consumers) {`
`51`	`51`	`auto *exec_con = def2exec[consumer.op];`
`52`	`52`	`assert(exec_con !=nullptr);`
`53`		`-Link(&exec_node, o, exec_con, consumer.idx)->device = dev;`
	`53`	`+auto *edge =Link(&exec_node, o, exec_con, consumer.idx);`
	`54`	`+ edge->device = dev;`
	`55`	`+if (consumer.op) {`
	`56`	`+auto &consumer_spec = consumer.op->spec;`
	`57`	`+auto &schema = consumer_spec.GetSchemaOrDefault();`
	`58`	`+if (edge->consumer_input_idx < schema.MaxNumInput()) {// only regular inputs`
	`59`	`+if (schema.GetInputDevice(edge->consumer_input_idx) == InputDevice::Metadata)`
	`60`	`+ edge->metadata =true;`
	`61`	`+ }`
	`62`	`+ }`
`54`	`63`	`}`
`55`	`64`	`exec_node.outputs[o].device = dev;`
`56`	`65`	`}`

`‎dali/pipeline/executor/executor2/exec_node_task.cc`

Lines changed: 11 additions & 12 deletions

Original file line number	Diff line number	Diff line change
`@@ -278,16 +278,13 @@ void OpTask::SetWorkspaceInputs() {`
`278`	`278`	`auto process_input = [&](int i,auto backend) {`
`279`	`279`	`using Backend =decltype(backend);`
`280`	`280`	`constauto &inp = TaskInput<Backend>(ti);`
`281`		-// If the output order of the operator is `host` then we don't wait for GPU
`282`		`-// inputs - they can't be accessed directly on host and the operator will`
`283`		`-// have to issue some sort of synchronization if and when necessary.`
`284`		`-// This optimization is essential to avoid oversynchronization`
`285`		`-// when the operator needs to access the metadata only (e.g. getting the shape).`
`286`		`-if ((order.is_device() \|\| std::is_same_v<Backend, CPUBackend>)/see comment above/ &&`
`287`		`- inp.event && inp.order != order)`
	`281`	`+bool is_meta = node_->inputs[i]->metadata;`
	`282`	`+// metadata-only inputs don't need to be synchronized`
	`283`	`+if (!is_meta && inp.event && inp.order != order)`
`288`	`284`	`events.insert(inp.event);`
`289`	`285`
`290`		`-if (inp.order == order) {// use the input directly`
	`286`	`+// metadata-only inputs don't need a proper stream`
	`287`	`+if (inp.order == order \|\| is_meta) {// use the input directly`
`291`	`288`	`ws_->SetInput(i, inp.data);`
`292`	`289`	`}else {// create another TL and set its order (and layout, while we're at it)`
`293`	`290`	`auto tl = std::make_shared<TensorList<Backend>>();`
`@@ -477,7 +474,7 @@ tasking::SharedTask ExecNodeTask::CreateTask(ExecNode *node, const WorkspacePara`
`477`	`474`	`}`
`478`	`475`	`}`
`479`	`476`
`480`		`-voidClearWorkspacePayload(Workspace &ws) {`
	`477`	`+voidClearWorkspacePayload(Workspace &ws, ExecNode &node) {`
`481`	`478`	`auto event = ws.has_event() ? ws.event() :nullptr;`
`482`	`479`	`for (int i =0; i < ws.NumInput(); i++) {`
`483`	`480`	`// TODO(michalz): Some smarter deletion management`
`@@ -492,14 +489,16 @@ void ClearWorkspacePayload(Workspace &ws) {`
`492`	`489`	`if (ws.InputIsType<CPUBackend>(i)) {`
`493`	`490`	`if (auto &pinp = ws.InputPtr<CPUBackend>(i)) {`
`494`	`491`	`auto &inp = *pinp;`
`495`		`-if (inp.is_pinned() && event && inp.order() != ws.output_order())`
	`492`	`+if (event &&`
	`493`	`+ !node.inputs[i]->metadata &&`
	`494`	`+ inp.is_pinned() && inp.order() != ws.output_order())`
`496`	`495`	`inp.order().wait(event);`
`497`	`496`	`ws.SetInput<CPUBackend>(i,nullptr);`
`498`	`497`	`}`
`499`	`498`	`}elseif (ws.InputIsType<GPUBackend>(i)) {`
`500`	`499`	`if (auto &pinp = ws.InputPtr<GPUBackend>(i)) {`
`501`	`500`	`auto &inp = *pinp;`
`502`		`-if (event && inp.order() != ws.output_order())`
	`501`	`+if (event &&!node.inputs[i]->metadata &&inp.order() != ws.output_order())`
`503`	`502`	`inp.order().wait(event);`
`504`	`503`	`ws.SetInput<GPUBackend>(i,nullptr);`
`505`	`504`	`}`
`@@ -525,7 +524,7 @@ void ClearWorkspacePayload(Workspace &ws) {`
`525`	`524`
`526`	`525`	`voidExecNodeTask::ClearWorkspace() {`
`527`	`526`	`assert(ws_);`
`528`		`-ClearWorkspacePayload(*ws_);`
	`527`	`+ClearWorkspacePayload(ws_, node_);`
`529`	`528`	`}`
`530`	`529`
`531`	`530`	`}// namespace exec2`

`‎dali/pipeline/executor/executor2/stream_assignment.h`

Lines changed: 15 additions & 6 deletions

Original file line number	Diff line number	Diff line change
`@@ -35,15 +35,15 @@ template <StreamPolicy policy>`
`35`	`35`	`classStreamAssignment;`
`36`	`36`
`37`	`37`	`inlineboolNeedsStream(const ExecNode *node) {`
`38`		`-if (node->is_pipeline_output) {`
`39`		`-for (auto &pipe_out : node->inputs) {`
`40`		`-if (pipe_out->device == StorageDevice::GPU)`
	`38`	`+if (node->is_pipeline_output \|\| node->backend == OpType::CPU) {`
	`39`	`+for (auto &input : node->inputs) {`
	`40`	`+if (input->device == StorageDevice::GPU && !input->metadata)`
`41`	`41`	`returntrue;`
`42`	`42`	`}`
	`43`	`+returnfalse;`
`43`	`44`	`}else {`
`44`		`-returnnode->backend != OpType::CPU;`
	`45`	`+returntrue;`
`45`	`46`	`}`
`46`		`-returnfalse;`
`47`	`47`	`}`
`48`	`48`
`49`	`49`	`inline OpTypeNodeType(const ExecNode *node) {`
`@@ -117,6 +117,12 @@ class StreamAssignment<StreamPolicy::PerBackend> {`
`117`	`117`	`if (has_gpu_)`
`118`	`118`	`return;// we already have both, nothing more can happen`
`119`	`119`	`break;`
	`120`	`+case OpType::CPU:`
	`121`	`+if (NeedsStream(&node)) {// treat CPU nodes with GPU inputs as GPU`
	`122`	`+ has_gpu_ =true;`
	`123`	`+if (has_mixed_)`
	`124`	`+return;`
	`125`	`+ }`
`120`	`126`	`default:`
`121`	`127`	`break;`
`122`	`128`	`}`
`@@ -128,11 +134,14 @@ class StreamAssignment<StreamPolicy::PerBackend> {`
`128`	`134`	`* If the node is a Mixed node, it gets stream index 0.`
`129`	`135`	`* If the node is a GPU node it gets stream index 1 if there are any mixed nodes, otherwise`
`130`	`136`	`* the only stream is the GPU stream and the returned index is 0.`
	`137`	`+ * CPU nodes get GPU stream if they need one (i.e. they have a GPU input)`
`131`	`138`	`*/`
`132`	`139`	`std::optional<int>operator[](const ExecNode *node)const {`
`133`	`140`	`switch (NodeType(node)) {`
`134`	`141`	`case OpType::CPU:`
`135`		`-return std::nullopt;`
	`142`	`+if (!NeedsStream(node))`
	`143`	`+return std::nullopt;`
	`144`	`+// fall-through to GPU`
`136`	`145`	`case OpType::GPU:`
`137`	`146`	`return has_mixed_ ?1 :0;`
`138`	`147`	`case OpType::MIXED:`

`‎dali/pipeline/executor/executor2/stream_assignment_test.cc`

Lines changed: 88 additions & 3 deletions

Original file line number	Diff line number	Diff line change
`@@ -48,6 +48,31 @@ DALI_REGISTER_OPERATOR(StreamAssignmentDummyOp, StreamAssignmentDummyOp<CPUBacke`
`48`	`48`	`DALI_REGISTER_OPERATOR(StreamAssignmentDummyOp, StreamAssignmentDummyOp<MixedBackend>, Mixed);`
`49`	`49`	`DALI_REGISTER_OPERATOR(StreamAssignmentDummyOp, StreamAssignmentDummyOp<GPUBackend>, GPU);`
`50`	`50`
	`51`	`+`
	`52`	`+template<typename Backend>`
	`53`	`+classStreamAssignmentMetaOp :publicOperator<Backend> {`
	`54`	`+public:`
	`55`	`+using Operator<Backend>::Operator;`
	`56`	`+USE_OPERATOR_MEMBERS();`
	`57`	`+`
	`58`	`+voidRunImpl(Workspace &ws)override {}`
	`59`	`+boolSetupImpl(std::vector<OutputDesc> &output_desc,const Workspace &ws)override {`
	`60`	`+returnfalse;`
	`61`	`+ }`
	`62`	`+};`
	`63`	`+`
	`64`	`+DALI_SCHEMA(StreamAssignmentMetaOp)`
	`65`	`+ .NumInput(0,999)`
	`66`	`+ .InputDevice(0,999, InputDevice::Metadata)`
	`67`	`+ .NumOutput(0)`
	`68`	`+ .AdditionalOutputsFn([](const OpSpec &spec) {`
	`69`	`+return spec.NumOutput();`
	`70`	`+ });`
	`71`	`+`
	`72`	`+DALI_REGISTER_OPERATOR(StreamAssignmentMetaOp, StreamAssignmentMetaOp<CPUBackend>, CPU);`
	`73`	`+DALI_REGISTER_OPERATOR(StreamAssignmentMetaOp, StreamAssignmentMetaOp<MixedBackend>, Mixed);`
	`74`	`+DALI_REGISTER_OPERATOR(StreamAssignmentMetaOp, StreamAssignmentMetaOp<GPUBackend>, GPU);`
	`75`	`+`
`51`	`76`	`namespaceexec2 {`
`52`	`77`
`53`	`78`	`namespace {`
`@@ -71,6 +96,27 @@ OpSpec SpecMixed() {`
`71`	`96`	`returnSpecDev("mixed");`
`72`	`97`	`}`
`73`	`98`
	`99`	`+`
	`100`	`+OpSpecSpecMetaDev(const std::string &device) {`
	`101`	`+returnOpSpec("StreamAssignmentMetaOp")`
	`102`	`+ .AddArg("device", device)`
	`103`	`+ .AddArg("num_threads",1)`
	`104`	`+ .AddArg("max_batch_size",1);`
	`105`	`+}`
	`106`	`+`
	`107`	`+OpSpecSpecMetaGPU() {`
	`108`	`+returnSpecMetaDev("gpu");`
	`109`	`+}`
	`110`	`+`
	`111`	`+OpSpecSpecMetaCPU() {`
	`112`	`+returnSpecMetaDev("cpu");`
	`113`	`+}`
	`114`	`+`
	`115`	`+OpSpecSpecMetaMixed() {`
	`116`	`+returnSpecMetaDev("mixed");`
	`117`	`+}`
	`118`	`+`
	`119`	`+`
`74`	`120`	`autoMakeNodeMap(const ExecGraph &graph) {`
`75`	`121`	`std::map<std::string_view,const ExecNode *, std::less<>> map;`
`76`	`122`	`for (auto &n : graph.Nodes())`
`@@ -122,6 +168,38 @@ TEST(Exec2Test, StreamAssignment_Single_CPUMixedGPU) {`
`122`	`168`	`EXPECT_EQ(assignment[map["c"]],0);`
`123`	`169`	`}`
`124`	`170`
	`171`	`+template<StreamPolicy policy>`
	`172`	`+voidTestGPU2CPUAssignment() {`
	`173`	`+ graph::OpGraph::Builder b;`
	`174`	`+ b.Add("a",`
	`175`	`+SpecGPU()`
	`176`	`+ .AddOutput("a->b","gpu")`
	`177`	`+ .AddOutput("a->c","gpu"));`
	`178`	`+ b.Add("b",`
	`179`	`+SpecCPU()`
	`180`	`+ .AddInput("a->b","gpu")`
	`181`	`+ .AddOutput("b->out","cpu"));`
	`182`	`+ b.Add("c",`
	`183`	`+SpecMetaCPU()`
	`184`	`+ .AddInput("a->c","gpu")`
	`185`	`+ .AddOutput("c->out","cpu"));`
	`186`	`+ b.AddOutput("b->out_cpu");`
	`187`	`+ b.AddOutput("c->out_cpu");`
	`188`	`+auto g =std::move(b).GetGraph(true);`
	`189`	`+ ExecGraph eg;`
	`190`	`+ eg.Lower(g);`
	`191`	`+`
	`192`	`+ StreamAssignment<policy>assignment(eg);`
	`193`	`+auto map =MakeNodeMap(eg);`
	`194`	`+EXPECT_EQ(assignment[map["a"]],0);`
	`195`	`+EXPECT_EQ(assignment[map["b"]],0);// CPU operator with GPU input`
	`196`	`+EXPECT_EQ(assignment[map["c"]], std::nullopt);// metadata-only`
	`197`	`+}`
	`198`	`+`
	`199`	`+TEST(Exec2Test, StreamAssignment_Single_GPU2CPU) {`
	`200`	`+ TestGPU2CPUAssignment<StreamPolicy::Single>();`
	`201`	`+}`
	`202`	`+`
`125`	`203`
`126`	`204`	`TEST(Exec2Test, StreamAssignment_PerBackend_OnlyCPU) {`
`127`	`205`	`graph::OpGraph::Builder b;`
`@@ -194,6 +272,13 @@ TEST(Exec2Test, StreamAssignment_PerBackend_CPUMixedGPU) {`
`194`	`272`	`}`
`195`	`273`
`196`	`274`
	`275`	`+TEST(Exec2Test, StreamAssignment_PerBackend_GPU2CPU) {`
	`276`	`+ TestGPU2CPUAssignment<StreamPolicy::PerBackend>();`
	`277`	`+}`
	`278`	`+`
	`279`	`+TEST(Exec2Test, StreamAssignment_OperOperator_GPU2CPU) {`
	`280`	`+ TestGPU2CPUAssignment<StreamPolicy::PerOperator>();`
	`281`	`+}`
`197`	`282`
`198`	`283`	`TEST(Exec2Test, StreamAssignment_PerOperator_1) {`
`199`	`284`	`ExecGraph eg;`
`@@ -272,7 +357,7 @@ TEST(Exec2Test, StreamAssignment_PerOperator_2) {`
`272`	`357`	`SpecGPU()`
`273`	`358`	`.AddOutput("i->j","gpu"));`
`274`	`359`	`b.Add("j",`
`275`		`-SpecCPU()`
	`360`	`+SpecMetaCPU()`
`276`	`361`	`.AddInput("i->j","gpu")`
`277`	`362`	`.AddOutput("j->h","cpu"));`
`278`	`363`	`b.Add("b",`
`@@ -320,15 +405,15 @@ TEST(Exec2Test, StreamAssignment_PerOperator_2) {`
`320`	`405`	`StreamAssignment<StreamPolicy::PerOperator>assignment(eg);`
`321`	`406`	`auto map =MakeNodeMap(eg);`
`322`	`407`	`EXPECT_EQ(assignment[map["a"]],0);`
`323`		`-EXPECT_EQ(assignment[map["b"]],std::nullopt);`
	`408`	`+EXPECT_EQ(assignment[map["b"]],0);// CPU operator with a GPU input needs a stream`
`324`	`409`	`EXPECT_EQ(assignment[map["c"]],0);`
`325`	`410`	`EXPECT_EQ(assignment[map["d"]],0);`
`326`	`411`	`EXPECT_EQ(assignment[map["e"]],1);`
`327`	`412`	`EXPECT_EQ(assignment[map["f"]],0);`
`328`	`413`	`EXPECT_EQ(assignment[map["g"]],0);`
`329`	`414`	`EXPECT_EQ(assignment[map["h"]],0);`
`330`	`415`	`EXPECT_EQ(assignment[map["i"]],2);`
`331`		`-EXPECT_EQ(assignment[map["j"]], std::nullopt);`
	`416`	`+EXPECT_EQ(assignment[map["j"]], std::nullopt);// metadata only`
`332`	`417`	`EXPECT_EQ(assignment[map["k"]],3);`
`333`	`418`	`}`
`334`	`419`

0 commit comments

Comments

(0)

Movatterモバイル変換

Navigation Menu

Search code, repositories, users, issues, pull requests...

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Commit3c97d9b

File tree

15 files changed

15 files changed

`‎dali/operators/generic/cast.cc`

`‎dali/operators/generic/constant_value.cc`

`‎dali/operators/generic/shapes.cc`

`‎dali/operators/random/beta_distribution_cpu.cc`

`‎dali/operators/random/choice_cpu.cc`

`‎dali/operators/random/coin_flip_cpu.cc`

`‎dali/operators/random/normal_distribution_cpu.cc`

`‎dali/operators/random/uniform_distribution_cpu.cc`

`‎dali/pipeline/executor/executor2/exec_graph.h`

`‎dali/pipeline/executor/executor2/exec_graph_lowering.cc`

`‎dali/pipeline/executor/executor2/exec_node_task.cc`

`‎dali/pipeline/executor/executor2/stream_assignment.h`

`‎dali/pipeline/executor/executor2/stream_assignment_test.cc`

0 commit comments