Movatterモバイル変換


[0]ホーム

URL:


Skip to content
Search Gists
Sign in Sign up

Instantly share code, notes, and snippets.

@maximal
CreatedNovember 3, 2020 16:43
    • Star(0)You must be signed in to star a gist
    • Fork(0)You must be signed in to fork a gist
    Save maximal/c9b29b4367c9b60beca5687702329a17 to your computer and use it in GitHub Desktop.
    Excel writing example
    <?php
    /**
    * Генератор текстовых диффов из XLSX-файлов
    * с использованием редакционного расстояния Левенштейна
    *
    * @author MaximAL
    * @since 2020-08-01
    * @date 2020-08-01
    * @time 4:51
    *
    * @link https://maximals.ru
    * @link https://sijeko.ru
    */
    namespaceMaximal\TextDiff;
    useBox\Spout\Common\Entity\Row;
    useBox\Spout\Reader\Common\Creator\ReaderEntityFactory;
    useBox\Spout\Reader\XLSX\Sheet;
    useBox\Spout\Writer\Common\Creator\Style\StyleBuilder;
    useBox\Spout\Writer\Common\Creator\WriterEntityFactory;
    /**
    * Class App
    * @package Maximal\TextDiff
    */
    class App
    {
    constDEFAULT_NUMBER =2;
    private$config;
    private$argv;
    publicfunction__construct(array$argv,array$config = [])
    {
    $this->argv =$argv;
    $this->config =$config;
    }
    publicfunctionrun()
    {
    echo'XLSX text diff top generator (c) MaximAL 2020',PHP_EOL;
    $argc =count($this->argv);
    if ($argc <3 ||$argc >4) {
    $this->help();
    return;
    }
    $timeStart =microtime(true);
    // Валидация количества
    $numberParam =$argc ===4 ?$this->argv[3] :'';
    if ($argc ===4 && !preg_match('/^[1-9]\d*$/',$numberParam)) {
    echo'Third parameter (got:',$numberParam,') should be a positive integer.',PHP_EOL;
    return;
    }
    $number =$argc ===4 ?intval($numberParam) :self::DEFAULT_NUMBER;
    // Валидация входного файла
    $inputFile =$this->argv[1];
    if (!preg_match('/\.xlsx$/ui',$inputFile)) {
    echo'File must be in XLSX format only:',$inputFile,PHP_EOL;
    return;
    }
    if (!is_file($inputFile)) {
    echo'Input file cannot be read or does not exist:',$inputFile,PHP_EOL;
    return;
    }
    // Валидация выходного файла
    $outputFile =$this->argv[2];
    if (is_file($outputFile)) {
    echo'Output file already exists:',$outputFile,PHP_EOL;
    return;
    }
    if (@file_put_contents($outputFile,'test') ===false) {
    echo'Could not open file for writing (read-only?):',$outputFile,PHP_EOL;
    return;
    }
    unlink($outputFile);
    // Поехали!
    $reader = ReaderEntityFactory::createXLSXReader();
    $reader->open($inputFile);
    $sheetNames = [];
    foreach ($reader->getSheetIterator()as$sheet) {
    /** @var Sheet $sheet */
    $sheetNames []=$sheet->getName();
    }
    if (count($sheetNames) <2) {
    echo'XLSX file must contain at least 2 sheets:',$inputFile,PHP_EOL;
    return;
    }
    $array1 = [];
    $array2 = [];
    foreach ($reader->getSheetIterator()as$index =>$sheet) {
    /** @var Sheet $sheet */
    foreach ($sheet->getRowIterator()as$row) {
    /** @var Row $row */
    if ($index ===1) {
    $array1 []=$row->toArray();
    }else {
    $array2 []=$row->toArray();
    }
    }
    }
    $reader->close();
    array_shift($array1);
    array_shift($array2);
    $count1 =count($array1);
    $count2 =count($array2);
    echo'Average calculations:',$count1,' ×',$count2,'';
    echoself::averageNumber($count1 *$count2),PHP_EOL;
    echo'Average output rows:',$count1,' ×',$number,'';
    echoself::averageNumber($count1 *$number),PHP_EOL;
    $defaultStyle = (newStyleBuilder())->setShouldWrapText()->build();
    $writer = WriterEntityFactory::createXLSXWriter();
    $writer->openToFile($outputFile)->setDefaultRowStyle($defaultStyle);
    $writer->addRow(WriterEntityFactory::createRowFromArray([
    'Массив 1',
    null,
    'Массив 2',
    null,
    'Близость',
    ]));
    $writer->addRow(WriterEntityFactory::createRowFromArray([
    'ИД',
    'Значение',
    'ИД',
    'Значение',
    'Расстояние Дамерау—Левенштейна',
    ]));
    $index =0;
    $rows =0;
    foreach ($array1as$row1) {
    $index++;
    $percent =100 *$index /$count1;
    echo"\r",$index ,'/',$count1,sprintf(' %.1f%%',$percent);
    $id1 =isset($row1[0]) ?intval($row1[0]) :0;
    if ($id1 ===0) {
    continue;
    }
    $text1 =isset($row1[1]) ?trim($row1[1]) :'';
    if ($text1 ==='') {
    continue;
    }
    $distance = [];
    foreach ($array2as$row2) {
    $id2 =isset($row2[0]) ?intval($row2[0]) :0;
    if ($id2 ===0) {
    continue;
    }
    $text2 =isset($row2[1]) ?trim($row2[1]) :'';
    if ($text2 ==='') {
    continue;
    }
    $distance []= [
    'id' =>$id2,
    'text' =>$text2,
    'dist' =>levenshtein($text1,$text2),
    ];
    }
    usort($distance, [self::class,'compareDistances']);
    $top =array_slice($distance,0,$number);
    foreach ($topas$topIndex =>$item) {
    $writer->addRow(WriterEntityFactory::createRowFromArray([
    $id1,
    $text1,
    $item['id'],
    $item['text'],
    $item['dist'],
    ]));
    $rows++;
    }
    unset($distance);
    unset($top);
    }
    echoPHP_EOL,'Writing file. Please wait...';
    $writer->close();
    $timeDiff =microtime(true) -$timeStart;
    echo"\r",'Done with',$rows,' rows in',round($timeDiff),' seconds.',PHP_EOL;
    }
    privatefunctionhelp()
    {
    echo'Usage: php',$this->argv[0],' {input XLSX file}';
    echo'{output XLSX file} [n, default',self::DEFAULT_NUMBER,']',PHP_EOL;
    }
    privatestaticfunctioncompareDistances($a,$b):int
    {
    if ($a['dist'] ===$b['dist']) {
    return0;
    }
    return$a['dist'] <$b['dist'] ? -1 : +1;
    }
    privatestaticfunctionaverageNumber($number):string
    {
    if ($number >999999999) {
    returnround($number /1000000000,1) .'G';
    }
    if ($number >999999) {
    returnround($number /1000000,1) .'M';
    }
    if ($number >999) {
    returnround($number /1000,1) .'k';
    }
    return$number;
    }
    }
    Sign up for freeto join this conversation on GitHub. Already have an account?Sign in to comment

    [8]ページ先頭

    ©2009-2025 Movatter.jp