Skip to content

Commit

Permalink
Prepare Csv Reader for Php8.4
Browse files Browse the repository at this point in the history
Also security patches.
  • Loading branch information
oleibman committed Sep 8, 2024
1 parent 822d910 commit 949ff63
Show file tree
Hide file tree
Showing 7 changed files with 86 additions and 28 deletions.
24 changes: 10 additions & 14 deletions .php-cs-fixer.dist.php
Original file line number Diff line number Diff line change
Expand Up @@ -18,18 +18,17 @@
'backtick_to_shell_exec' => true,
'binary_operator_spaces' => true,
'blank_line_after_namespace' => true,
'blank_lines_before_namespace' => ['max_line_breaks' => 2, 'min_line_breaks' => 2], // we want 1 blank line before namespace
'blank_line_after_opening_tag' => true,
'blank_line_before_statement' => true,
'braces' => true,
'cast_spaces' => true,
'class_attributes_separation' => ['elements' => ['method' => 'one', 'property' => 'one']], // const are often grouped with other related const
'class_definition' => false, // phpcs disagree
'class_keyword_remove' => false, // Deprecated, and ::class keyword gives us better support in IDE
'combine_consecutive_issets' => true,
'combine_consecutive_unsets' => true,
'combine_nested_dirname' => true,
'comment_to_phpdoc' => false, // interferes with annotations
'compact_nullable_typehint' => true,
'compact_nullable_type_declaration' => true,
'concat_space' => ['spacing' => 'one'],
'constant_case' => true,
'date_time_immutable' => false, // Break our unit tests
Expand All @@ -46,7 +45,6 @@
'encoding' => true,
'ereg_to_preg' => true,
'error_suppression' => false, // it breaks \PhpOffice\PhpSpreadsheet\Helper\Handler
'escape_implicit_backslashes' => true,
'explicit_indirect_variable' => false, // I feel it makes the code actually harder to read
'explicit_string_variable' => false, // I feel it makes the code actually harder to read
'final_class' => false, // We need non-final classes
Expand All @@ -58,7 +56,6 @@
'fully_qualified_strict_types' => true,
'function_declaration' => true,
'function_to_constant' => true,
'function_typehint_space' => true,
'general_phpdoc_annotation_remove' => ['annotations' => ['access', 'category', 'copyright']],
'general_phpdoc_tag_rename' => true,
'global_namespace_import' => true,
Expand Down Expand Up @@ -92,15 +89,14 @@
'native_constant_invocation' => false, // Micro optimization that look messy
'native_function_casing' => true,
'native_function_invocation' => false, // I suppose this would be best, but I am still unconvinced about the visual aspect of it
'native_function_type_declaration_casing' => true,
'new_with_braces' => true,
'native_type_declaration_casing' => true,
'new_with_parentheses' => ['anonymous_class' => true, 'named_class' => true],
'no_alias_functions' => true,
'no_alias_language_construct_call' => true,
'no_alternative_syntax' => true,
'no_binary_string' => true,
'no_blank_lines_after_class_opening' => true,
'no_blank_lines_after_phpdoc' => true,
'no_blank_lines_before_namespace' => false, // we want 1 blank line before namespace
'no_break_comment' => true,
'no_closing_tag' => true,
'no_empty_comment' => true,
Expand All @@ -120,16 +116,14 @@
'no_space_around_double_colon' => true,
'no_spaces_after_function_name' => true,
'no_spaces_around_offset' => true,
'no_spaces_inside_parenthesis' => true,
'no_superfluous_elseif' => false, // Might be risky on a huge code base
'no_superfluous_phpdoc_tags' => ['allow_mixed' => true],
'no_trailing_comma_in_list_call' => true,
'no_trailing_comma_in_singleline_array' => true,
'no_trailing_comma_in_singleline' => ['elements' => ['arguments', 'array_destructuring', 'array', 'group_import']],
'no_trailing_whitespace' => true,
'no_trailing_whitespace_in_comment' => true,
'no_trailing_whitespace_in_string' => false, // Too dangerous
'no_unneeded_control_parentheses' => true,
'no_unneeded_curly_braces' => true,
'no_unneeded_braces' => true,
'no_unneeded_final_method' => true,
'no_unreachable_default_argument_value' => true,
'no_unset_cast' => true,
Expand Down Expand Up @@ -214,21 +208,22 @@
'simplified_if_return' => false, // Even if technically correct we prefer to be explicit
'simplified_null_return' => false, // Even if technically correct we prefer to be explicit
'single_blank_line_at_eof' => true,
'single_blank_line_before_namespace' => true,
'single_class_element_per_statement' => true,
'single_import_per_statement' => true,
'single_line_after_imports' => true,
'single_line_comment_style' => true,
'single_line_throw' => false, // I don't see any reason for having a special case for Exception
'single_quote' => true,
'single_space_after_construct' => true,
'single_space_around_construct' => true,
'single_trait_insert_per_statement' => true,
'space_after_semicolon' => true,
'spaces_inside_parentheses' => ['space' => 'none'],
'standardize_increment' => true,
'standardize_not_equals' => true,
'static_lambda' => false, // Risky if we can't guarantee nobody use `bindTo()`
'strict_comparison' => false, // No, too dangerous to change that
'strict_param' => false, // No, too dangerous to change that
'string_implicit_backslashes' => false, // was escape_implicit_backslashes, too confusing
'string_length_to_empty' => true,
'string_line_ending' => true,
'switch_case_semicolon_to_colon' => true,
Expand All @@ -239,6 +234,7 @@
'ternary_to_null_coalescing' => true,
'trailing_comma_in_multiline' => true,
'trim_array_spaces' => true,
'type_declaration_spaces' => ['elements' => ['function', 'property']], // was function_typehint_space
'types_spaces' => true,
'unary_operator_spaces' => true,
'use_arrow_functions' => true,
Expand Down
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,11 @@ and this project adheres to [Semantic Versioning](https://semver.org).
### Fixed

- Backported security patches.
- Change to Csv Reader (see below under Deprecated). Backport of PR #4162 intended for 3.0.0. [Issue #4161](https://github.com/PHPOffice/PhpSpreadsheet/issues/4161)

### Deprecated

- Php8.4 will deprecate the escape parameter of fgetcsv. Csv Reader is affected by this; code is changed to be unaffected, but this will mean a breaking change is coming with Php9. Any code which uses the default escape value of backslash will fail in Php9. It is recommended to explicitly set the escape value to null string before then.

## 2024-05-11 - 2.1.0

Expand Down
54 changes: 47 additions & 7 deletions src/PhpSpreadsheet/Reader/Csv.php
Original file line number Diff line number Diff line change
Expand Up @@ -62,8 +62,19 @@ class Csv extends BaseReader

/**
* The character that can escape the enclosure.
* This will probably become unsupported in Php 9.
* Not yet ready to mark deprecated in order to give users
* a migration path.
*/
private string $escapeCharacter = '\\';
private ?string $escapeCharacter = null;

/**
* The character that will be supplied to fgetcsv
* when escapeCharacter is null.
* It is anticipated that it will conditionally be set
* to null-string for Php9 and above.
*/
private static string $defaultEscapeCharacter = '\\';

/**
* Callback for setting defaults in construction.
Expand Down Expand Up @@ -185,7 +196,7 @@ protected function inferSeparator(): void
return;
}

$inferenceEngine = new Delimiter($this->fileHandle, $this->escapeCharacter, $this->enclosure);
$inferenceEngine = new Delimiter($this->fileHandle, $this->escapeCharacter ?? self::$defaultEscapeCharacter, $this->enclosure);

// If number of lines is 0, nothing to infer : fall back to the default
if ($inferenceEngine->linesCounted() === 0) {
Expand Down Expand Up @@ -228,11 +239,11 @@ public function listWorksheetInfo(string $filename): array
$delimiter = $this->delimiter ?? '';

// Loop through each line of the file in turn
$rowData = fgetcsv($fileHandle, 0, $delimiter, $this->enclosure, $this->escapeCharacter);
$rowData = self::getCsv($fileHandle, 0, $delimiter, $this->enclosure, $this->escapeCharacter);
while (is_array($rowData)) {
++$worksheetInfo[0]['totalRows'];
$worksheetInfo[0]['lastColumnIndex'] = max($worksheetInfo[0]['lastColumnIndex'], count($rowData) - 1);
$rowData = fgetcsv($fileHandle, 0, $delimiter, $this->enclosure, $this->escapeCharacter);
$rowData = self::getCsv($fileHandle, 0, $delimiter, $this->enclosure, $this->escapeCharacter);
}

$worksheetInfo[0]['lastColumnLetter'] = Coordinate::stringFromColumnIndex($worksheetInfo[0]['lastColumnIndex'] + 1);
Expand Down Expand Up @@ -379,7 +390,7 @@ private function loadStringOrFile(string $filename, Spreadsheet $spreadsheet, bo

// Loop through each line of the file in turn
$delimiter = $this->delimiter ?? '';
$rowData = fgetcsv($fileHandle, 0, $delimiter, $this->enclosure, $this->escapeCharacter);
$rowData = self::getCsv($fileHandle, 0, $delimiter, $this->enclosure, $this->escapeCharacter);
$valueBinder = Cell::getValueBinder();
$preserveBooleanString = method_exists($valueBinder, 'getBooleanConversion') && $valueBinder->getBooleanConversion();
$this->getTrue = Calculation::getTRUE();
Expand Down Expand Up @@ -416,7 +427,7 @@ private function loadStringOrFile(string $filename, Spreadsheet $spreadsheet, bo
}
++$columnLetter;
}
$rowData = fgetcsv($fileHandle, 0, $delimiter, $this->enclosure, $this->escapeCharacter);
$rowData = self::getCsv($fileHandle, 0, $delimiter, $this->enclosure, $this->escapeCharacter);
++$currentRow;
}

Expand Down Expand Up @@ -527,6 +538,11 @@ public function getContiguous(): bool
return $this->contiguous;
}

/**
* Php9 intends to drop support for this parameter in fgetcsv.
* Not yet ready to mark deprecated in order to give users
* a migration path.
*/
public function setEscapeCharacter(string $escapeCharacter): self
{
$this->escapeCharacter = $escapeCharacter;
Expand All @@ -536,7 +552,7 @@ public function setEscapeCharacter(string $escapeCharacter): self

public function getEscapeCharacter(): string
{
return $this->escapeCharacter;
return $this->escapeCharacter ?? self::$defaultEscapeCharacter;
}

/**
Expand Down Expand Up @@ -648,4 +664,28 @@ public function setSheetNameIsFileName(bool $sheetNameIsFileName): self

return $this;
}

/**
* Php8.4 deprecates use of anything other than null string
* as escape Character.
*
* @param resource $stream
* @param null|int<0, max> $length
*
* @return array<int,?string>|false
*/
private static function getCsv(
$stream,
?int $length = null,
string $separator = ',',
string $enclosure = '"',
?string $escape = null
): array|false {
$escape = $escape ?? self::$defaultEscapeCharacter;
if (PHP_VERSION_ID >= 80400 && $escape !== '') {
return @fgetcsv($stream, $length, $separator, $enclosure, $escape);
}

return fgetcsv($stream, $length, $separator, $enclosure, $escape);
}
}
24 changes: 18 additions & 6 deletions src/PhpSpreadsheet/Reader/Security/XmlScanner.php
Original file line number Diff line number Diff line change
Expand Up @@ -35,15 +35,11 @@ private static function forceString(mixed $arg): string

private function toUtf8(string $xml): string
{
$pattern = '/encoding="(.*?)"/';
$result = preg_match($pattern, $xml, $matches);
$charset = strtoupper($result ? $matches[1] : 'UTF-8');

$charset = $this->findCharSet($xml);
if ($charset !== 'UTF-8') {
$xml = self::forceString(mb_convert_encoding($xml, 'UTF-8', $charset));

$result = preg_match($pattern, $xml, $matches);
$charset = strtoupper($result ? $matches[1] : 'UTF-8');
$charset = $this->findCharSet($xml);
if ($charset !== 'UTF-8') {
throw new Reader\Exception('Suspicious Double-encoded XML, spreadsheet file load() aborted to prevent XXE/XEE attacks');
}
Expand All @@ -52,6 +48,22 @@ private function toUtf8(string $xml): string
return $xml;
}

private function findCharSet(string $xml): string
{
$patterns = [
'/encoding\\s*=\\s*"([^"]*]?)"/',
"/encoding\\s*=\\s*'([^']*?)'/",
];

foreach ($patterns as $pattern) {
if (preg_match($pattern, $xml, $matches)) {
return strtoupper($matches[1]);
}
}

return 'UTF-8';
}

/**
* Scan the XML for use of <!ENTITY to prevent XXE/XEE attacks.
*
Expand Down
1 change: 0 additions & 1 deletion src/PhpSpreadsheet/Writer/ZipStream3.php
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@

namespace PhpOffice\PhpSpreadsheet\Writer;

use ZipStream\Option\Archive;
use ZipStream\ZipStream;

class ZipStream3
Expand Down
2 changes: 2 additions & 0 deletions tests/data/Reader/Xml/XEETestInvalidUTF-7-single-quote.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
<?xml version="1.0" encoding='UTF-7' standalone="yes"?>
+ADw-+ACE-DOCTYPE+ACA-foo+ACA-+AFs-+ADw-+ACE-ENTITY+ACA-toreplace+ACA-+ACI-xxe+AF8-test+ACI-+AD4-+ACA-+AF0-+AD4-+AAo-+ADw-sst+ACA-xmlns+AD0-+ACI-http://schemas.openxmlformats.org/spreadsheetml/2006/main+ACI-+ACA-count+AD0-+ACI-2+ACI-+ACA-uniqueCount+AD0-+ACI-1+ACI-+AD4-+ADw-si+AD4-+ADw-t+AD4-+ACY-toreplace+ADs-+ADw-/t+AD4-+ADw-/si+AD4-+ADw-/sst+AD4-
4 changes: 4 additions & 0 deletions tests/data/Reader/Xml/XEETestValidUTF-8-single-quote.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
<?xml version='1.0' encoding='UTF-8' standalone='yes'?>
<root>
test: Valid
</root>

0 comments on commit 949ff63

Please sign in to comment.