|
8 | 8 | fromdatetimeimportdatetime |
9 | 9 |
|
10 | 10 | importdatasets |
11 | | -fromInstructorEmbeddingimportINSTRUCTOR |
12 | 11 | importnumpy |
13 | 12 | importorjson |
14 | 13 | fromrougeimportRouge |
@@ -502,23 +501,17 @@ def transform(task, args, inputs, stream=False): |
502 | 501 |
|
503 | 502 |
|
504 | 503 | defcreate_embedding(transformer): |
505 | | -instructor=transformer.startswith("hkunlp/instructor") |
506 | | -klass=INSTRUCTORifinstructorelseSentenceTransformer |
507 | | -returnklass(transformer) |
| 504 | +returnSentenceTransformer(transformer) |
508 | 505 |
|
509 | 506 |
|
510 | 507 | defembed_using(model,transformer,inputs,kwargs): |
511 | 508 | ifisinstance(kwargs,str): |
512 | 509 | kwargs=orjson.loads(kwargs) |
513 | 510 |
|
514 | 511 | instructor=transformer.startswith("hkunlp/instructor") |
515 | | -ifinstructor: |
516 | | -texts_with_instructions= [] |
| 512 | +ifinstructorand"instruction"inkwargs: |
517 | 513 | instruction=kwargs.pop("instruction") |
518 | | -fortextininputs: |
519 | | -texts_with_instructions.append([instruction,text]) |
520 | | - |
521 | | -inputs=texts_with_instructions |
| 514 | +kwargs["prompt"]=instruction |
522 | 515 |
|
523 | 516 | returnmodel.encode(inputs,**kwargs) |
524 | 517 |
|
@@ -1029,7 +1022,6 @@ def __init__( |
1029 | 1022 | path:str, |
1030 | 1023 | hyperparameters:dict, |
1031 | 1024 | )->None: |
1032 | | - |
1033 | 1025 | # initialize class variables |
1034 | 1026 | self.project_id=project_id |
1035 | 1027 | self.model_id=model_id |
@@ -1100,8 +1092,9 @@ def print_number_of_trainable_model_parameters(self, model): |
1100 | 1092 | # Calculate and print the number and percentage of trainable parameters |
1101 | 1093 | r_log("info",f"Trainable model parameters:{trainable_model_params}") |
1102 | 1094 | r_log("info",f"All model parameters:{all_model_params}") |
1103 | | -r_log("info", |
1104 | | -f"Percentage of trainable model parameters:{100*trainable_model_params/all_model_params:.2f}%" |
| 1095 | +r_log( |
| 1096 | +"info", |
| 1097 | +f"Percentage of trainable model parameters:{100*trainable_model_params/all_model_params:.2f}%", |
1105 | 1098 | ) |
1106 | 1099 |
|
1107 | 1100 | deftokenize_function(self): |
@@ -1396,23 +1389,22 @@ def __init__( |
1396 | 1389 | "bias":"none", |
1397 | 1390 | "task_type":"CAUSAL_LM", |
1398 | 1391 | } |
1399 | | -r_log("info", |
| 1392 | +r_log( |
| 1393 | +"info", |
1400 | 1394 | "LoRA configuration are not set. Using default parameters" |
1401 | | -+json.dumps(self.lora_config_params) |
| 1395 | ++json.dumps(self.lora_config_params), |
1402 | 1396 | ) |
1403 | 1397 |
|
1404 | 1398 | self.prompt_template=None |
1405 | 1399 | if"prompt_template"inhyperparameters.keys(): |
1406 | 1400 | self.prompt_template=hyperparameters.pop("prompt_template") |
1407 | 1401 |
|
1408 | 1402 | deftrain(self): |
1409 | | - |
1410 | 1403 | args=TrainingArguments( |
1411 | 1404 | output_dir=self.path,logging_dir=self.path,**self.training_args |
1412 | 1405 | ) |
1413 | 1406 |
|
1414 | 1407 | defformatting_prompts_func(example): |
1415 | | - |
1416 | 1408 | system_content=example["system"] |
1417 | 1409 | user_content=example["user"] |
1418 | 1410 | assistant_content=example["assistant"] |
@@ -1463,7 +1455,7 @@ def formatting_prompts_func(example): |
1463 | 1455 | peft_config=LoraConfig(**self.lora_config_params), |
1464 | 1456 | callbacks=[PGMLCallback(self.project_id,self.model_id)], |
1465 | 1457 | ) |
1466 | | -r_log("info","Creating Supervised Fine Tuning trainer done. Training ... ") |
| 1458 | +r_log("info","Creating Supervised Fine Tuning trainer done. Training ... ") |
1467 | 1459 |
|
1468 | 1460 | # Train |
1469 | 1461 | self.trainer.train() |
@@ -1582,7 +1574,6 @@ def finetune_conversation( |
1582 | 1574 | project_id, |
1583 | 1575 | model_id, |
1584 | 1576 | ): |
1585 | | - |
1586 | 1577 | train_dataset=datasets.Dataset.from_dict( |
1587 | 1578 | { |
1588 | 1579 | "system":system_train, |
|