|
| 1 | +##tool learning 数据集评测教程 |
| 2 | + |
| 3 | +###chatml接入方式 |
| 4 | +如果需要在自己的 huggingface 格式的模型上进行测试的话,总的步骤分为如下几步: |
| 5 | +1. 编写~/evals/FuncCallEvalution 的 create_prompts 函数 |
| 6 | +2. 编写~/models/base_model 的 相关函数 |
| 7 | +3. 注册模型和评估函数 |
| 8 | +4. 执行测试脚本 |
| 9 | +如果模型在加载进来后不需要特殊的处理,而且输入也不需要转换为特定的格式(e.g. chatml 格式或者其他的 human-bot 格式),请直接跳转到第四步直接发起测试。 |
| 10 | + |
| 11 | +####1. 编写 loader 函数 |
| 12 | +如果模型在加载进来还需要做一些额外的处理(e.g. tokenizer 调整),需要去`src.context_builder.context_builder_family.py` 中继承`ModelAndTokenizerLoader` 类来覆写对应的`load_model` 和`load_tokenizer` 函数,具体可以参照以下示例: |
| 13 | +```python |
| 14 | +classFuncCallEvalution(ToolEvalution): |
| 15 | + |
| 16 | +defcreate_prompts(self,func_call_datas): |
| 17 | +''' |
| 18 | + datas: [ |
| 19 | + { |
| 20 | + "instruction": history[his_idx], |
| 21 | + "input": "", |
| 22 | + "output": output, |
| 23 | + "history": [(human_content, ai_content), (), ()], |
| 24 | + "functions": tools |
| 25 | + } |
| 26 | + ] |
| 27 | +''' |
| 28 | + system_content='''CodeFuse是一个面向研发领域的智能助手,旨在中立的、无害的帮助用户解决开发相关的问题,所有的回答均使用Markdown格式返回。 |
| 29 | + 你能利用许多工具和功能来完成给定的任务,在每一步中,你需要分析当前状态,并通过执行函数调用来确定下一步的行动方向。你可以进行多次尝试。如果你计划连续尝试不同的条件,请每次尝试一种条件。若给定了Finish函数,则以Finish调用结束,若没提供Finish函数,则以不带function_call的对话结束。''' |
| 30 | + function_format='''You are ToolGPT, you have access to the following APIs:\n{tools}''' |
| 31 | + |
| 32 | + func_call_train_datas= [] |
| 33 | + history_error_cnt=0 |
| 34 | + funccall_error_cnt=0 |
| 35 | + |
| 36 | +for datain func_call_datas: |
| 37 | + tools= data["functions"] |
| 38 | + chatrounds= data["chatrounds"] |
| 39 | + |
| 40 | + function_content="" |
| 41 | +iflen(tools)>0: |
| 42 | + function_content= function_format.format(tools=json.dumps(tools,ensure_ascii=False,sort_keys=True)) |
| 43 | + |
| 44 | + history= [] |
| 45 | +for iin chatrounds: |
| 46 | +if i["role"]=="system": |
| 47 | +continue |
| 48 | + |
| 49 | +if i["role"]=="user": |
| 50 | + history.append(("user", i["content"])) |
| 51 | + |
| 52 | +if i["role"]=="assistant": |
| 53 | +if"function_call"in i: |
| 54 | +ifnotisinstance(i["function_call"],dict): |
| 55 | + funccall_error_cnt+=1 |
| 56 | +continue |
| 57 | + content="#function"+ json.dumps({**{"content": i["content"]},**i["function_call"]},ensure_ascii=False) |
| 58 | +else: |
| 59 | + content= i["content"] |
| 60 | + history.append(("assistant", content)) |
| 61 | + |
| 62 | + |
| 63 | +if i["role"]=="function": |
| 64 | + content= json.dumps({**{"content": i["content"]},**{"name": i["name"]}},ensure_ascii=False) |
| 65 | + history.append(("user", content)) |
| 66 | + |
| 67 | + |
| 68 | + history= [i[1]for iin history] |
| 69 | + history[0]="\n".join([system_content,function_content, history[0]]) |
| 70 | + |
| 71 | +for his_idxinrange(0,len(history),2): |
| 72 | + output= history[his_idx+1] |
| 73 | + |
| 74 | +if"#function"in output: |
| 75 | + output= output.split("#function")[-1] |
| 76 | + |
| 77 | +try: |
| 78 | + output= json.loads(output) |
| 79 | +except: |
| 80 | + output= {"content": output} |
| 81 | + |
| 82 | + |
| 83 | + func_call_train_datas.append( |
| 84 | + { |
| 85 | +"instruction": history[his_idx], |
| 86 | +"input":"", |
| 87 | +"output": output, |
| 88 | +"history": [history[:his_idx+2][i:i+2]for iinrange(0,len(history[:his_idx]),2)], |
| 89 | +"functions": tools |
| 90 | + }, |
| 91 | + ) |
| 92 | +return func_call_train_datas |
| 93 | +``` |
| 94 | + |
| 95 | +####2. 编写 Model 的 context_builder 函数 |
| 96 | +如果输入需要转换为特定的格式(e.g. chatml 格式或者其他的 human-bot 格式),则需要去`src.context_builder.context_builder_family` 中继承 ContextBuilder 类来覆写 make_context 函数,这个函数是用来将输入转换格式为对应需要的输出的,一个示例如下: |
| 97 | +```python |
| 98 | +classToolModel: |
| 99 | +def__init__(self,model_path:str,template:str,trust_remote_code=True,tensor_parallel_size=1,gpu_memory_utilization=0.25): |
| 100 | +self.model_path= model_path |
| 101 | +self.trust_remote_code= trust_remote_code |
| 102 | +self.tensor_parallel_size= tensor_parallel_size |
| 103 | +self.gpu_memory_utilization= gpu_memory_utilization |
| 104 | +self.load_model(self.model_path,self.trust_remote_code,self.tensor_parallel_size,self.gpu_memory_utilization) |
| 105 | + |
| 106 | +defgenerate(self,prompts:str,template:str=None,generate_configs: GenerateConfigs=None) ->list: |
| 107 | +'''产出对应结果''' |
| 108 | +pass |
| 109 | + |
| 110 | +defgenerate_params( |
| 111 | +self,generate_configs: GenerateConfigs, |
| 112 | + ): |
| 113 | +'''generate param''' |
| 114 | + kargs= generate_configs.dict() |
| 115 | +return kargs |
| 116 | + |
| 117 | +defload_model(self,model_path,trust_remote_code=True,tensor_parallel_size=1,gpu_memory_utilization=0.25): |
| 118 | +'''加载模型''' |
| 119 | +self.tokenizer= AutoTokenizer.from_pretrained(self.model_path,trust_remote_code=trust_remote_code) |
| 120 | +self.model= AutoModelForCausalLM.from_pretrained(self.model_path,device_map="auto",trust_remote_code=trust_remote_code).eval() |
| 121 | + |
| 122 | +# self.model = LLM(model=model_path, trust_remote_code=trust_remote_code, tensor_parallel_size=tensor_parallel_size, gpu_memory_utilization=gpu_memory_utilization) |
| 123 | +``` |
| 124 | + |
| 125 | +####3. 注册模型和eval函数即可 |
| 126 | +在~/models/__init__.py 中注册即可 |
| 127 | +```python |
| 128 | +from .base_modelimport ToolModel |
| 129 | + |
| 130 | +__all__= [ |
| 131 | +"ToolModel", |
| 132 | +] |
| 133 | +``` |
| 134 | +在~/evasl/__init__.py 中注册即可 |
| 135 | +```python |
| 136 | +from .base_evalutionimport ToolEvalution |
| 137 | +from .toolfill_evalutionimport ToolFillEvalution |
| 138 | +from .toolparser_evalutionimport ToolParserEvalution |
| 139 | +from .toolsummary_evalutionimport ToolSummaryEvalution |
| 140 | +from .func_call_evalutionimport FuncCallEvalution |
| 141 | + |
| 142 | + |
| 143 | +__all__= [ |
| 144 | +"ToolEvalution","ToolFillEvalution","ToolParserEvalution","ToolSummaryEvalution","FuncCallEvalution" |
| 145 | +] |
| 146 | +``` |
| 147 | + |
| 148 | + |
| 149 | +####4. 执行测试脚本 |
| 150 | +修改~/src/qwen_eval_main.py# datainfos和model_infos |
| 151 | +```python |
| 152 | +model_infos= [ |
| 153 | + {"model_name":"","template":"chatml","model_path":"", |
| 154 | +"peft_path":"","model_class": QwenModel}] |
| 155 | + |
| 156 | +datainfos= [ |
| 157 | + {"dataset_path":"~/fcdata_luban_zh_test.jsonl","dataset_name":"fcdata_luban_zh","tool_task":"func_call"}, |
| 158 | + {"dataset_path":"~/test_datas/fcdata_zh_test_v1.jsonl","dataset_name":"fcdata_zh","tool_task":"func_call"}, |
| 159 | +] |
| 160 | +``` |
| 161 | + |
| 162 | +运行下述命令即可 |
| 163 | +```Bash |
| 164 | +python qwen_eval_main.py |
| 165 | +``` |
| 166 | + |
| 167 | +<br> |
| 168 | + |
| 169 | +###非chatml接入 |
| 170 | +如果需要在自己的 huggingface 格式的模型上进行测试的话,总的步骤分为如下几步: |
| 171 | +1. 编写~/getAssistantAns.py 相关代码 |
| 172 | +2. 执行测试脚本 |
| 173 | + |
| 174 | + |
| 175 | +####1、编写 getAssistantAns 示例 |
| 176 | +``` |
| 177 | +class GetAssistantAns(): |
| 178 | + # 按照自己推理需求自己修改代码 |
| 179 | +
|
| 180 | + def __init__(self, gpu_num=1): |
| 181 | + model = AutoModelForCausalLM.from_pretrained(model_name) |
| 182 | + device_list = [] |
| 183 | + for gpu_idx in range(gpu_num): |
| 184 | + device_list.append(torch.device("cuda:0")) |
| 185 | +
|
| 186 | + # 将模型移动到指定的GPU设备 |
| 187 | + model.to(device) |
| 188 | +
|
| 189 | +
|
| 190 | + def gen_answer(self, chat_dict, gpu_index): |
| 191 | + # 这里实际根据自己推理逻辑 然后转为标准格式返回 |
| 192 | + # 以下仅仅是样例 |
| 193 | + import time |
| 194 | + print(os.environ["CUDA_VISIBLE_DEVICES"]) |
| 195 | + time.sleep(1) |
| 196 | + rtn_dict1 = { |
| 197 | + "role": "assistant", |
| 198 | + "content": None, |
| 199 | + "function_call": |
| 200 | + { |
| 201 | + "name": "get_fudan_university_scoreline", |
| 202 | + "arguments": "{\n \"year\": \"2020\"\n}" |
| 203 | + } |
| 204 | + } |
| 205 | +
|
| 206 | + rtn_dict2 = { |
| 207 | + "role": "assistant", |
| 208 | + "content": "2020年复旦大学的分数线如下:\n\n- 文科一批:630分\n- 文科二批:610分\n- 理科一批:650分\n- 理科二批:630分" |
| 209 | + } |
| 210 | +
|
| 211 | + return random.choice([rtn_dict1, rtn_dict2]) |
| 212 | +``` |
| 213 | +####2、执行测试脚本 |
| 214 | +修改~/src/opensource_functioncall_evalution.py # test_ans_file_list |
| 215 | +```python |
| 216 | +test_ans_file_list= [ |
| 217 | +"fcdata_zh_test.jsonl" |
| 218 | + ] |
| 219 | +``` |
| 220 | + |
| 221 | +运行下述命令即可 |
| 222 | +```Bash |
| 223 | +python opensource_functioncall_evalution.py |
| 224 | +``` |