Commit6ddf5cf

desertfire

authored and

pytorchmergebot

committed

[AOTI] Update cpp wrapper codegen to use v2 C shim (#120714)

Summary: To use the torchgen-ed v2 C shim interface, cpp wrapper codegen needs to update its rule for generating the right parameter and function call. Because changing the emitted code will cause a FC breakage, we add a flag to control the behavior.Differential Revision: [D54258086](https://our.internmc.facebook.com/intern/diff/D54258086)Pull Requestresolved:#120714Approved by:https://github.com/chenyang78ghstack dependencies:#120513

1 parentbd19d6d commit6ddf5cfCopy full SHA for 6ddf5cf

File tree

4 files changed

+45

-8

lines changed

torch/_inductor
- codegen
  - cpp_wrapper_cpu.py
  - cpp_wrapper_cuda.py
- config.py
- ir.py

4 files changed

+45

-8

lines changed

`‎torch/_inductor/codegen/cpp_wrapper_cpu.py‎`

Lines changed: 28 additions & 6 deletions

Original file line number	Diff line number	Diff line change
`@@ -24,8 +24,9 @@ class CppWrapperCpu(WrapperCodeGen):`
`24`	`24`	`"""`
`25`	`25`
`26`	`26`	`def__init__(self):`
	`27`	`+ifnothasattr(self,"device"):`
	`28`	`+self.device="cpu"`
`27`	`29`	`super().__init__()`
`28`		`-`
`29`	`30`	`self.declare="auto "`
`30`	`31`	`self.declare_maybe_reference="decltype(auto) "`
`31`	`32`	`self.ending=";"`
`@@ -149,7 +150,12 @@ def write_header(self):`
`149`	`150`	`)`
`150`	`151`
`151`	`152`	`ifconfig.abi_compatible:`
`152`		`-self.header.splice("#include <torch/csrc/inductor/aoti_torch/c/shim.h>")`
	`153`	`+ifconfig.c_shim_version=="1":`
	`154`	`+self.header.splice("#include <torch/csrc/inductor/aoti_torch/c/shim.h>")`
	`155`	`+else:`
	`156`	`+self.header.splice(`
	`157`	`+f"#include <torch/csrc/inductor/aoti_torch/generated/c_shim_{self.device}.h>"`
	`158`	`+ )`
`153`	`159`	`else:`
`154`	`160`	`ifnotV.graph.aot_mode:`
`155`	`161`	`self.header.splice("#include <pybind11/pybind11.h>")`
`@@ -924,7 +930,11 @@ def generate_c_shim_extern_kernel_call(self, kernel, args):`
`924`	`930`	`kernel_suffix=kernel_tokens[-1]`
`925`	`931`	`ifkernel_suffix=="call":`
`926`	`932`	`kernel_suffix=kernel_tokens[-2]`
`927`		`-shim_fn=f"aoti_torch_{kernel_suffix}"`
	`933`	`+ifconfig.c_shim_version=="1":`
	`934`	`+shim_fn=f"aoti_torch_{kernel_suffix}"`
	`935`	`+else:`
	`936`	`+shim_fn=f"aoti_torch_{self.device}_{kernel_suffix}"`
	`937`	`+`
`928`	`938`	`# HACK: val_to_arg_str jams multiple arguments together using a comma. If that`
`929`	`939`	`# ever breaks, it needs to be reworked to be able to return multiple arguments,`
`930`	`940`	`# and the split-on-comma code here needs to be removed.`
`@@ -1676,12 +1686,24 @@ def val_to_cpp_arg_str(self, type_, val, is_legacy_abi) -> str:`
`1676`	`1686`	`):`
`1677`	`1687`	`ifvalisNone:`
`1678`	`1688`	`return"0"# nullptr is not available in C`
`1679`		`-ifisinstance(val, (bool,int,str,float)):`
	`1689`	`+ifnotisinstance(type_.getElementType(),torch.TensorType):`
`1680`	`1690`	`var_name=f"var_{next(self.arg_var_id)}"`
`1681`	`1691`	`self.writeline(f"auto{var_name} ={self.val_to_arg_str(val)};")`
`1682`	`1692`	`returnf"&{var_name}"`
`1683`		`-ifnotisinstance(type_.getElementType(),torch.TensorType):`
`1684`		`-returnf"&{self.val_to_arg_str(val)}"`
	`1693`	`+elifconfig.c_shim_version=="2":`
	`1694`	`+# Similar to other data type, use pointer to denote optional tensor arg in v2 C shim`
	`1695`	`+base_handle=self.val_to_arg_str(val)`
	`1696`	`+if"wrap_with_raii_handle_if_needed"inbase_handle:`
	`1697`	`+# wrap_with_raii_handle_if_needed creates a temp RAIIAtenTensorHandle, so we need to`
	`1698`	`+# explicitly store it. Otherwise, it will be destroyed before the fallback kernel call.`
	`1699`	`+tmp_var_name=f"var_{next(self.arg_var_id)}"`
	`1700`	`+self.writeline(`
	`1701`	`+f"RAIIAtenTensorHandle{tmp_var_name} ={base_handle};"`
	`1702`	`+ )`
	`1703`	`+base_handle=tmp_var_name`
	`1704`	`+var_name=f"var_{next(self.arg_var_id)}"`
	`1705`	`+self.writeline(f"AtenTensorHandle{var_name} ={base_handle}.get();")`
	`1706`	`+returnf"&{var_name}"`
`1685`	`1707`
`1686`	`1708`	`returnself.val_to_arg_str(val)`
`1687`	`1709`

`‎torch/_inductor/codegen/cpp_wrapper_cuda.py‎`

Lines changed: 1 addition & 0 deletions

Original file line number	Diff line number	Diff line change
`@@ -43,6 +43,7 @@ class CppWrapperCuda(CppWrapperCpu):`
`43`	`43`	`"""`
`44`	`44`
`45`	`45`	`def__init__(self):`
	`46`	`+self.device="cuda"`
`46`	`47`	`super().__init__()`
`47`	`48`	`self.grid_id=count()`
`48`	`49`	`self.cuda=True`

`‎torch/_inductor/config.py‎`

Lines changed: 4 additions & 0 deletions

Original file line number	Diff line number	Diff line change
`@@ -41,6 +41,10 @@ def enable_autotune_remote_cache():`
`41`	`41`	`os.environ.get("TORCHINDUCTOR_ABI_COMPATIBLE","1"ifis_fbcode()else"0")=="1"`
`42`	`42`	`)`
`43`	`43`
	`44`	`+c_shim_version=os.environ.get(`
	`45`	`+"TORCHINDUCTOR_C_SHIM_VERSION","1"ifis_fbcode()else"2"`
	`46`	`+)`
	`47`	`+`
`44`	`48`	`# dead code elimination`
`45`	`49`	`dce=False`
`46`	`50`

`‎torch/_inductor/ir.py‎`

Lines changed: 12 additions & 2 deletions

Original file line number	Diff line number	Diff line change
`@@ -4855,7 +4855,10 @@ def is_not_write(arg):`
`4855`	`4855`	`self.init_args_default_value(kernel._schema)`
`4856`	`4856`
`4857`	`4857`	`defis_legacy_abi_kernel(self):`
`4858`		`-return"_scaled_dot_product_flash_attention"instr(self.python_kernel_name)`
	`4858`	`+return (`
	`4859`	`+config.c_shim_version=="1"`
	`4860`	`+and"_scaled_dot_product_flash_attention"instr(self.python_kernel_name)`
	`4861`	`+ )`
`4859`	`4862`
`4860`	`4863`	`definit_args_default_value(self,schema):`
`4861`	`4864`	`self.args_default_value= [`
`@@ -4908,6 +4911,7 @@ def __repr__(self):`
`4908`	`4911`	`self.abi_compatible_kernel= (`
`4909`	`4912`	`f"{self.cpp_kernel_name}_v2"`
`4910`	`4913`	`ifself.cpp_kernel_namein {"at::_scaled_dot_product_flash_attention"}`
	`4914`	`+andconfig.c_shim_version=="1"`
`4911`	`4915`	`elseself.cpp_kernel_name`
`4912`	`4916`	`)`
`4913`	`4917`
`@@ -5065,7 +5069,13 @@ def codegen(self, wrapper):`
`5065`	`5069`	`# Aten Fallback Ops`
`5066`	`5070`	`assertisinstance(kernel,torch._ops.OpOverload)`
`5067`	`5071`	`ifV.graph.cpp_wrapper:`
`5068`		`-ifconfig.is_fbcode()andkernelnotinhas_c_shim:`
	`5072`	`+if (`
	`5073`	`+config.is_fbcode()`
	`5074`	`+andkernelnotinhas_c_shim`
	`5075`	`+# C shim v2 is torchgen-ed, which should cover all aten ops.`
	`5076`	`+# If you do hit a missed op, please update gen_aoti_c_shim.py.`
	`5077`	`+andconfig.c_shim_version=="1"`
	`5078`	`+ ):`
`5069`	`5079`	`log.warning(`
`5070`	`5080`	`"%s is missing a c-shim implementation, using proxy executor as fallback",`
`5071`	`5081`	`kernel,`

0 commit comments

Comments

(0)

Movatterモバイル変換

Navigation Menu

Search code, repositories, users, issues, pull requests...

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Commit6ddf5cf

File tree

4 files changed

4 files changed

`‎torch/_inductor/codegen/cpp_wrapper_cpu.py‎`

`‎torch/_inductor/codegen/cpp_wrapper_cuda.py‎`

`‎torch/_inductor/config.py‎`

`‎torch/_inductor/ir.py‎`

0 commit comments