NotificationsYou must be signed in to change notification settings
Fork376
Star2.9k

Commit10662c0

committed

Renamed to be less confusing

1 parent0b42683 commit10662c0Copy full SHA for 10662c0

File tree

7 files changed

+28

-27

lines changed

core/runtime
py/torch_tensorrt/dynamo
- _compiler.py
- runtime
  - _PythonTorchTensorRTModule.py
  - _TorchTensorRTModule.py

7 files changed

+28

-27

lines changed

`‎core/runtime/TRTEngine.cpp‎`

Lines changed: 4 additions & 4 deletions

Original file line number	Diff line number	Diff line change
`@@ -289,12 +289,12 @@ void TRTEngine::enable_profiling() {`
`289`	`289`	`exec_ctx->setProfiler(trt_engine_profiler.get());`
`290`	`290`	`}`
`291`	`291`
`292`		`-voidTRTEngine::set_unowned_output_tensor(bool enable) {`
`293`		`-this->unowned_output_tensor = enable;`
	`292`	`+voidTRTEngine::set_output_tensors_as_unowned(bool enable) {`
	`293`	`+this->output_tensors_are_unowned = enable;`
`294`	`294`	`}`
`295`	`295`
`296`		`-boolTRTEngine::is_unowned_output_tensor() {`
`297`		`-returnthis->unowned_output_tensor;`
	`296`	`+boolTRTEngine::are_output_tensors_unowned() {`
	`297`	`+returnthis->output_tensors_are_unowned;`
`298`	`298`	`}`
`299`	`299`
`300`	`300`	`voidTRTEngine::set_profile_format(std::string format) {`

`‎core/runtime/TRTEngine.h‎`

Lines changed: 3 additions & 3 deletions

Original file line number	Diff line number	Diff line change
`@@ -105,7 +105,7 @@ struct TRTEngine : torch::CustomClassHolder {`
`105`	`105`	`std::pair<uint64_t,uint64_t> num_io;`
`106`	`106`	`uint64_t io_size;`
`107`	`107`	`std::map<std::string,bool> isShapeInferenceIO;`
`108`		`-boolunowned_output_tensor =false;`
	`108`	`+booloutput_tensors_are_unowned =false;`
`109`	`109`	`std::string name;`
`110`	`110`	`RTDevice device_info;`
`111`	`111`
`@@ -162,8 +162,8 @@ struct TRTEngine : torch::CustomClassHolder {`
`162`	`162`	`int64_tget_automatic_device_memory_budget();`
`163`	`163`	`std::vector<at::Tensor>infer_outputs(std::vector<std::vector<int64_t>> input_shapes);`
`164`	`164`	`voidset_pre_allocated_outputs(bool enable);`
`165`		`-voidset_unowned_output_tensor(bool enable);`
`166`		`-boolis_unowned_output_tensor();`
	`165`	`+voidset_output_tensors_as_unowned(bool enable);`
	`166`	`+boolare_output_tensors_unowned();`
`167`	`167`	`TorchTRTRuntimeStates runtime_states;`
`168`	`168`	`friend std::ostream&operator<<(std::ostream& os,const TRTEngine& engine);`
`169`	`169`	`staticconstchar BINDING_DELIM ='%';`

`‎core/runtime/execute_engine.cpp‎`

Lines changed: 2 additions & 1 deletion

Original file line number	Diff line number	Diff line change
`@@ -249,7 +249,8 @@ std::vector<at::Tensor> execute_engine(std::vector<at::Tensor> inputs, c10::intr`
`249`	`249`	`if (can_use_pre_allocated_outputs) {`
`250`	`250`	`outputs = compiled_engine->pre_allocated_outputs;`
`251`	`251`	`}else {`
`252`		`-if (compiled_engine->allocated_outputs.size() ==0or compiled_engine->unowned_output_tensoror shape_changed) {`
	`252`	`+if (compiled_engine->allocated_outputs.size() ==0or compiled_engine->output_tensors_are_unownedor`
	`253`	`+ shape_changed) {`
`253`	`254`	`compiled_engine->allocated_outputs =create_output_tensors(compiled_engine);`
`254`	`255`	`new_outputs =true;`
`255`	`256`	`}`

`‎core/runtime/register_jit_hooks.cpp‎`

Lines changed: 2 additions & 2 deletions

Original file line number	Diff line number	Diff line change
`@@ -90,8 +90,8 @@ static auto TORCHTRT_UNUSED TRTEngineTSRegistrtion =`
`90`	`90`	`.def("get_engine_layer_info", &TRTEngine::get_engine_layer_info)`
`91`	`91`	`.def("infer_outputs", &TRTEngine::infer_outputs)`
`92`	`92`	`.def("reset_captured_graph", &TRTEngine::reset_captured_graph)`
`93`		`- .def("set_unowned_output_tensor", &TRTEngine::set_unowned_output_tensor)`
`94`		`- .def("is_unowned_output_tensor", &TRTEngine::is_unowned_output_tensor)`
	`93`	`+ .def("set_output_tensors_as_unowned", &TRTEngine::set_output_tensors_as_unowned)`
	`94`	`+ .def("are_output_tensors_unowned", &TRTEngine::are_output_tensors_unowned)`
`95`	`95`	`.def_readwrite("use_pre_allocated_outputs", &TRTEngine::use_pre_allocated_outputs)`
`96`	`96`	`.def_readwrite("use_output_allocator_outputs", &TRTEngine::use_output_allocator_outputs)`
`97`	`97`	`.def_property(`

`‎py/torch_tensorrt/dynamo/_compiler.py‎`

Lines changed: 1 addition & 1 deletion

Original file line number	Diff line number	Diff line change
`@@ -1073,7 +1073,7 @@ def preserve_module_specs(`
`1073`	`1073`
`1074`	`1074`	`# Only set the requires_unique_output flag for the last TRT Module when user has access to the output tensor`
`1075`	`1075`	`iftrt_module:`
`1076`		`-trt_module.set_unowned_output_tensor(True)`
	`1076`	`+trt_module.set_output_tensors_as_unowned(True)`
`1077`	`1077`
`1078`	`1078`	`# Parse the graph I/O and store it in dryrun tracker`
`1079`	`1079`	`parse_graph_io(gm,dryrun_tracker)`

`‎py/torch_tensorrt/dynamo/runtime/_PythonTorchTensorRTModule.py‎`

Lines changed: 10 additions & 10 deletions

Original file line number	Diff line number	Diff line change
`@@ -221,28 +221,28 @@ def __init__(`
`221`	`221`	`self.use_output_allocator_outputs=False`
`222`	`222`	`self.device=torch.cuda.current_device()`
`223`	`223`	`self.cudagraphs_enabled=torch_tensorrt.runtime.get_cudagraphs_mode()`
`224`		`-# If the output tensor is not owned by the engine (unowned_output_tensor=True), we need to create a new output tensor in each forward pass`
`225`		`-self.unowned_output_tensor=False`
	`224`	`+# If the output tensor is not owned by the engine (output_tensors_are_unowned=True), we need to create a new output tensor in each forward pass`
	`225`	`+self.output_tensors_are_unowned=False`
`226`	`226`	`ifself.serialized_engineisnotNoneandnotself.settings.lazy_engine_init:`
`227`	`227`	`self.setup_engine()`
`228`	`228`	`self.is_shape_inference_io= {`
`229`	`229`	`input_name:self.engine.is_shape_inference_io(input_name)`
`230`	`230`	`forinput_nameinself.input_names`
`231`	`231`	`}`
`232`	`232`
`233`		`-defset_unowned_output_tensor(self,enabled:bool)->None:`
	`233`	`+defset_output_tensors_as_unowned(self,enabled:bool)->None:`
`234`	`234`	`"""`
`235`		`-Set the flagtoindicate if the outputtensor is unownedby theengine.`
`236`		`- Ifself.unowned_output_tensor=True,theengine will create a new output tensor in eachforwardpass.`
`237`		`-This would be slower but is required when users needtomanipulate the output tensor after each forward pass.`
`238`		`- Therefore, this should be set to Trueonlyfor thelast module in a graph and leaveto Falsefor intermediate modules,`
`239`		`- which users don't have access to.`
	`235`	`+Flagtoset if the outputtensors of this engine are solely ownedby theTorch-TensorRT Runtime or if they might be shared with a user.`
	`236`	`+ If thetensors are not owned by the runtime, then they must be recreated on everyforwardcall which may have implications for performance.`
	`237`	`+Typically only the final engine in a graph requires output tensorstobe unowned and there are performance gains to be had for intermediate engines to manage their own standing memory.`
	`238`	`+ Therefore this shouldonlybe set to True for thefinal module in a graph and leavefalsefor intermediate modules.`
	`239`	`+`
`240`	`240`	`Args:`
`241`	`241`	`enabled: bool`
`242`	`242`	`Whether to set the flag to True.`
`243`	`243`
`244`	`244`	`"""`
`245`		`-self.unowned_output_tensor=enabled`
	`245`	`+self.output_tensors_are_unowned=enabled`
`246`	`246`
`247`	`247`	`defget_streamable_device_memory_budget(self)->Any:`
`248`	`248`	`returnself.engine.streamable_weights_size`
`@@ -528,7 +528,7 @@ def run_standard_execution() -> torch.Tensor \| Tuple[torch.Tensor, ...]:`
`528`	`528`	`)`
`529`	`529`	`if (`
`530`	`530`	`self.output_tensorsisNone`
`531`		`-orself.unowned_output_tensor`
	`531`	`+orself.output_tensors_are_unowned`
`532`	`532`	`orshape_changed`
`533`	`533`	`):`
`534`	`534`	`self.output_tensors=self.create_output_tensors()`

`‎py/torch_tensorrt/dynamo/runtime/_TorchTensorRTModule.py‎`

Lines changed: 6 additions & 6 deletions

Original file line number	Diff line number	Diff line change
`@@ -289,8 +289,8 @@ def set_extra_state(self, state: SerializedTorchTensorRTModuleFmt) -> None:`
`289`	`289`	`metadata=TorchTensorRTModule.decode_metadata(serialized_metadata)`
`290`	`290`	`self.settings=metadata["settings"]`
`291`	`291`	`self.weight_name_map=metadata["weight_name_map"]`
`292`		`-self.unowned_output_tensor=metadata["unowned_output_tensor"]`
`293`		`-self.engine.set_unowned_output_tensor(self.unowned_output_tensor)`
	`292`	`+self.output_tensors_are_unowned=metadata["output_tensors_are_unowned"]`
	`293`	`+self.engine.set_output_tensors_as_unowned(self.output_tensors_are_unowned)`
`294`	`294`
`295`	`295`	`else:`
`296`	`296`	`self.engine=None`
`@@ -362,11 +362,11 @@ def enable_profiling(`
`362`	`362`	`self.engine.enable_profiling()`
`363`	`363`	`self.engine.set_profile_format(profile_format)`
`364`	`364`
`365`		`-defset_unowned_output_tensor(self,enabled:bool)->None:`
`366`		`-self.engine.set_unowned_output_tensor(enabled)`
	`365`	`+defset_output_tensors_as_unowned(self,enabled:bool)->None:`
	`366`	`+self.engine.set_output_tensors_as_unowned(enabled)`
`367`	`367`
`368`		`-defis_unowned_output_tensor(self)->bool:`
`369`		`-returnself.engine.is_unowned_output_tensor()# type: ignore[no-any-return]`
	`368`	`+defare_output_tensors_unowned(self)->bool:`
	`369`	`+returnself.engine.are_output_tensors_unowned()# type: ignore[no-any-return]`
`370`	`370`
`371`	`371`	`defdisable_profiling(self)->None:`
`372`	`372`	`"""Disable the profiler"""`

0 commit comments

Comments

(0)

Movatterモバイル変換

Navigation Menu

Search code, repositories, users, issues, pull requests...

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Commit10662c0

File tree

7 files changed

7 files changed

`‎core/runtime/TRTEngine.cpp‎`

`‎core/runtime/TRTEngine.h‎`

`‎core/runtime/execute_engine.cpp‎`

`‎core/runtime/register_jit_hooks.cpp‎`

`‎py/torch_tensorrt/dynamo/_compiler.py‎`

`‎py/torch_tensorrt/dynamo/runtime/_PythonTorchTensorRTModule.py‎`

`‎py/torch_tensorrt/dynamo/runtime/_TorchTensorRTModule.py‎`

0 commit comments