Instantly share code, notes, and snippets.
Save maximal/c9b29b4367c9b60beca5687702329a17 to your computer and use it in GitHub Desktop.
Excel writing example
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.Learn more about bidirectional Unicode characters
<?php | |
/** | |
* Генератор текстовых диффов из XLSX-файлов | |
* с использованием редакционного расстояния Левенштейна | |
* | |
* @author MaximAL | |
* @since 2020-08-01 | |
* @date 2020-08-01 | |
* @time 4:51 | |
* | |
* @link https://maximals.ru | |
* @link https://sijeko.ru | |
*/ | |
namespaceMaximal\TextDiff; | |
useBox\Spout\Common\Entity\Row; | |
useBox\Spout\Reader\Common\Creator\ReaderEntityFactory; | |
useBox\Spout\Reader\XLSX\Sheet; | |
useBox\Spout\Writer\Common\Creator\Style\StyleBuilder; | |
useBox\Spout\Writer\Common\Creator\WriterEntityFactory; | |
/** | |
* Class App | |
* @package Maximal\TextDiff | |
*/ | |
class App | |
{ | |
constDEFAULT_NUMBER =2; | |
private$config; | |
private$argv; | |
publicfunction__construct(array$argv,array$config = []) | |
{ | |
$this->argv =$argv; | |
$this->config =$config; | |
} | |
publicfunctionrun() | |
{ | |
echo'XLSX text diff top generator (c) MaximAL 2020',PHP_EOL; | |
$argc =count($this->argv); | |
if ($argc <3 ||$argc >4) { | |
$this->help(); | |
return; | |
} | |
$timeStart =microtime(true); | |
// Валидация количества | |
$numberParam =$argc ===4 ?$this->argv[3] :''; | |
if ($argc ===4 && !preg_match('/^[1-9]\d*$/',$numberParam)) { | |
echo'Third parameter (got:',$numberParam,') should be a positive integer.',PHP_EOL; | |
return; | |
} | |
$number =$argc ===4 ?intval($numberParam) :self::DEFAULT_NUMBER; | |
// Валидация входного файла | |
$inputFile =$this->argv[1]; | |
if (!preg_match('/\.xlsx$/ui',$inputFile)) { | |
echo'File must be in XLSX format only:',$inputFile,PHP_EOL; | |
return; | |
} | |
if (!is_file($inputFile)) { | |
echo'Input file cannot be read or does not exist:',$inputFile,PHP_EOL; | |
return; | |
} | |
// Валидация выходного файла | |
$outputFile =$this->argv[2]; | |
if (is_file($outputFile)) { | |
echo'Output file already exists:',$outputFile,PHP_EOL; | |
return; | |
} | |
if (@file_put_contents($outputFile,'test') ===false) { | |
echo'Could not open file for writing (read-only?):',$outputFile,PHP_EOL; | |
return; | |
} | |
unlink($outputFile); | |
// Поехали! | |
$reader = ReaderEntityFactory::createXLSXReader(); | |
$reader->open($inputFile); | |
$sheetNames = []; | |
foreach ($reader->getSheetIterator()as$sheet) { | |
/** @var Sheet $sheet */ | |
$sheetNames []=$sheet->getName(); | |
} | |
if (count($sheetNames) <2) { | |
echo'XLSX file must contain at least 2 sheets:',$inputFile,PHP_EOL; | |
return; | |
} | |
$array1 = []; | |
$array2 = []; | |
foreach ($reader->getSheetIterator()as$index =>$sheet) { | |
/** @var Sheet $sheet */ | |
foreach ($sheet->getRowIterator()as$row) { | |
/** @var Row $row */ | |
if ($index ===1) { | |
$array1 []=$row->toArray(); | |
}else { | |
$array2 []=$row->toArray(); | |
} | |
} | |
} | |
$reader->close(); | |
array_shift($array1); | |
array_shift($array2); | |
$count1 =count($array1); | |
$count2 =count($array2); | |
echo'Average calculations:',$count1,' ×',$count2,' ≈'; | |
echoself::averageNumber($count1 *$count2),PHP_EOL; | |
echo'Average output rows:',$count1,' ×',$number,' ≈'; | |
echoself::averageNumber($count1 *$number),PHP_EOL; | |
$defaultStyle = (newStyleBuilder())->setShouldWrapText()->build(); | |
$writer = WriterEntityFactory::createXLSXWriter(); | |
$writer->openToFile($outputFile)->setDefaultRowStyle($defaultStyle); | |
$writer->addRow(WriterEntityFactory::createRowFromArray([ | |
'Массив 1', | |
null, | |
'Массив 2', | |
null, | |
'Близость', | |
])); | |
$writer->addRow(WriterEntityFactory::createRowFromArray([ | |
'ИД', | |
'Значение', | |
'ИД', | |
'Значение', | |
'Расстояние Дамерау—Левенштейна', | |
])); | |
$index =0; | |
$rows =0; | |
foreach ($array1as$row1) { | |
$index++; | |
$percent =100 *$index /$count1; | |
echo"\r",$index ,'/',$count1,sprintf(' %.1f%%',$percent); | |
$id1 =isset($row1[0]) ?intval($row1[0]) :0; | |
if ($id1 ===0) { | |
continue; | |
} | |
$text1 =isset($row1[1]) ?trim($row1[1]) :''; | |
if ($text1 ==='') { | |
continue; | |
} | |
$distance = []; | |
foreach ($array2as$row2) { | |
$id2 =isset($row2[0]) ?intval($row2[0]) :0; | |
if ($id2 ===0) { | |
continue; | |
} | |
$text2 =isset($row2[1]) ?trim($row2[1]) :''; | |
if ($text2 ==='') { | |
continue; | |
} | |
$distance []= [ | |
'id' =>$id2, | |
'text' =>$text2, | |
'dist' =>levenshtein($text1,$text2), | |
]; | |
} | |
usort($distance, [self::class,'compareDistances']); | |
$top =array_slice($distance,0,$number); | |
foreach ($topas$topIndex =>$item) { | |
$writer->addRow(WriterEntityFactory::createRowFromArray([ | |
$id1, | |
$text1, | |
$item['id'], | |
$item['text'], | |
$item['dist'], | |
])); | |
$rows++; | |
} | |
unset($distance); | |
unset($top); | |
} | |
echoPHP_EOL,'Writing file. Please wait...'; | |
$writer->close(); | |
$timeDiff =microtime(true) -$timeStart; | |
echo"\r",'Done with',$rows,' rows in',round($timeDiff),' seconds.',PHP_EOL; | |
} | |
privatefunctionhelp() | |
{ | |
echo'Usage: php',$this->argv[0],' {input XLSX file}'; | |
echo'{output XLSX file} [n, default',self::DEFAULT_NUMBER,']',PHP_EOL; | |
} | |
privatestaticfunctioncompareDistances($a,$b):int | |
{ | |
if ($a['dist'] ===$b['dist']) { | |
return0; | |
} | |
return$a['dist'] <$b['dist'] ? -1 : +1; | |
} | |
privatestaticfunctionaverageNumber($number):string | |
{ | |
if ($number >999999999) { | |
returnround($number /1000000000,1) .'G'; | |
} | |
if ($number >999999) { | |
returnround($number /1000000,1) .'M'; | |
} | |
if ($number >999) { | |
returnround($number /1000,1) .'k'; | |
} | |
return$number; | |
} | |
} |
Sign up for freeto join this conversation on GitHub. Already have an account?Sign in to comment