diff --git a/.php-cs-fixer.dist.php b/.php-cs-fixer.dist.php index 3450555d91..84301454fd 100644 --- a/.php-cs-fixer.dist.php +++ b/.php-cs-fixer.dist.php @@ -18,18 +18,17 @@ 'backtick_to_shell_exec' => true, 'binary_operator_spaces' => true, 'blank_line_after_namespace' => true, + 'blank_lines_before_namespace' => ['max_line_breaks' => 2, 'min_line_breaks' => 2], // we want 1 blank line before namespace 'blank_line_after_opening_tag' => true, 'blank_line_before_statement' => true, - 'braces' => true, 'cast_spaces' => true, 'class_attributes_separation' => ['elements' => ['method' => 'one', 'property' => 'one']], // const are often grouped with other related const 'class_definition' => false, // phpcs disagree - 'class_keyword_remove' => false, // Deprecated, and ::class keyword gives us better support in IDE 'combine_consecutive_issets' => true, 'combine_consecutive_unsets' => true, 'combine_nested_dirname' => true, 'comment_to_phpdoc' => false, // interferes with annotations - 'compact_nullable_typehint' => true, + 'compact_nullable_type_declaration' => true, 'concat_space' => ['spacing' => 'one'], 'constant_case' => true, 'date_time_immutable' => false, // Break our unit tests @@ -46,7 +45,6 @@ 'encoding' => true, 'ereg_to_preg' => true, 'error_suppression' => false, // it breaks \PhpOffice\PhpSpreadsheet\Helper\Handler - 'escape_implicit_backslashes' => true, 'explicit_indirect_variable' => false, // I feel it makes the code actually harder to read 'explicit_string_variable' => false, // I feel it makes the code actually harder to read 'final_class' => false, // We need non-final classes @@ -58,7 +56,6 @@ 'fully_qualified_strict_types' => true, 'function_declaration' => true, 'function_to_constant' => true, - 'function_typehint_space' => true, 'general_phpdoc_annotation_remove' => ['annotations' => ['access', 'category', 'copyright']], 'general_phpdoc_tag_rename' => true, 'global_namespace_import' => true, @@ -92,15 +89,14 @@ 'native_constant_invocation' => false, // Micro optimization that look messy 'native_function_casing' => true, 'native_function_invocation' => false, // I suppose this would be best, but I am still unconvinced about the visual aspect of it - 'native_function_type_declaration_casing' => true, - 'new_with_braces' => true, + 'native_type_declaration_casing' => true, + 'new_with_parentheses' => ['anonymous_class' => true, 'named_class' => true], 'no_alias_functions' => true, 'no_alias_language_construct_call' => true, 'no_alternative_syntax' => true, 'no_binary_string' => true, 'no_blank_lines_after_class_opening' => true, 'no_blank_lines_after_phpdoc' => true, - 'no_blank_lines_before_namespace' => false, // we want 1 blank line before namespace 'no_break_comment' => true, 'no_closing_tag' => true, 'no_empty_comment' => true, @@ -120,16 +116,14 @@ 'no_space_around_double_colon' => true, 'no_spaces_after_function_name' => true, 'no_spaces_around_offset' => true, - 'no_spaces_inside_parenthesis' => true, 'no_superfluous_elseif' => false, // Might be risky on a huge code base 'no_superfluous_phpdoc_tags' => ['allow_mixed' => true], - 'no_trailing_comma_in_list_call' => true, - 'no_trailing_comma_in_singleline_array' => true, + 'no_trailing_comma_in_singleline' => ['elements' => ['arguments', 'array_destructuring', 'array', 'group_import']], 'no_trailing_whitespace' => true, 'no_trailing_whitespace_in_comment' => true, 'no_trailing_whitespace_in_string' => false, // Too dangerous 'no_unneeded_control_parentheses' => true, - 'no_unneeded_curly_braces' => true, + 'no_unneeded_braces' => true, 'no_unneeded_final_method' => true, 'no_unreachable_default_argument_value' => true, 'no_unset_cast' => true, @@ -214,21 +208,22 @@ 'simplified_if_return' => false, // Even if technically correct we prefer to be explicit 'simplified_null_return' => false, // Even if technically correct we prefer to be explicit 'single_blank_line_at_eof' => true, - 'single_blank_line_before_namespace' => true, 'single_class_element_per_statement' => true, 'single_import_per_statement' => true, 'single_line_after_imports' => true, 'single_line_comment_style' => true, 'single_line_throw' => false, // I don't see any reason for having a special case for Exception 'single_quote' => true, - 'single_space_after_construct' => true, + 'single_space_around_construct' => true, 'single_trait_insert_per_statement' => true, 'space_after_semicolon' => true, + 'spaces_inside_parentheses' => ['space' => 'none'], 'standardize_increment' => true, 'standardize_not_equals' => true, 'static_lambda' => false, // Risky if we can't guarantee nobody use `bindTo()` 'strict_comparison' => false, // No, too dangerous to change that 'strict_param' => false, // No, too dangerous to change that + 'string_implicit_backslashes' => false, // was escape_implicit_backslashes, too confusing 'string_length_to_empty' => true, 'string_line_ending' => true, 'switch_case_semicolon_to_colon' => true, @@ -239,6 +234,7 @@ 'ternary_to_null_coalescing' => true, 'trailing_comma_in_multiline' => true, 'trim_array_spaces' => true, + 'type_declaration_spaces' => ['elements' => ['function', 'property']], // was function_typehint_space 'types_spaces' => true, 'unary_operator_spaces' => true, 'use_arrow_functions' => true, diff --git a/CHANGELOG.md b/CHANGELOG.md index 4b681af7f2..e2b388b9b1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,11 @@ and this project adheres to [Semantic Versioning](https://semver.org). ### Fixed - Backported security patches. +- Change to Csv Reader (see below under Deprecated). Backport of PR #4162 intended for 3.0.0. [Issue #4161](https://github.com/PHPOffice/PhpSpreadsheet/issues/4161) + +### Deprecated + +- Php8.4 will deprecate the escape parameter of fgetcsv. Csv Reader is affected by this; code is changed to be unaffected, but this will mean a breaking change is coming with Php9. Any code which uses the default escape value of backslash will fail in Php9. It is recommended to explicitly set the escape value to null string before then. ## 2024-05-11 - 2.1.0 diff --git a/src/PhpSpreadsheet/Reader/Csv.php b/src/PhpSpreadsheet/Reader/Csv.php index 9d9c26ede2..dd84a7ccc2 100644 --- a/src/PhpSpreadsheet/Reader/Csv.php +++ b/src/PhpSpreadsheet/Reader/Csv.php @@ -62,8 +62,19 @@ class Csv extends BaseReader /** * The character that can escape the enclosure. + * This will probably become unsupported in Php 9. + * Not yet ready to mark deprecated in order to give users + * a migration path. */ - private string $escapeCharacter = '\\'; + private ?string $escapeCharacter = null; + + /** + * The character that will be supplied to fgetcsv + * when escapeCharacter is null. + * It is anticipated that it will conditionally be set + * to null-string for Php9 and above. + */ + private static string $defaultEscapeCharacter = '\\'; /** * Callback for setting defaults in construction. @@ -185,7 +196,7 @@ protected function inferSeparator(): void return; } - $inferenceEngine = new Delimiter($this->fileHandle, $this->escapeCharacter, $this->enclosure); + $inferenceEngine = new Delimiter($this->fileHandle, $this->escapeCharacter ?? self::$defaultEscapeCharacter, $this->enclosure); // If number of lines is 0, nothing to infer : fall back to the default if ($inferenceEngine->linesCounted() === 0) { @@ -228,11 +239,11 @@ public function listWorksheetInfo(string $filename): array $delimiter = $this->delimiter ?? ''; // Loop through each line of the file in turn - $rowData = fgetcsv($fileHandle, 0, $delimiter, $this->enclosure, $this->escapeCharacter); + $rowData = self::getCsv($fileHandle, 0, $delimiter, $this->enclosure, $this->escapeCharacter); while (is_array($rowData)) { ++$worksheetInfo[0]['totalRows']; $worksheetInfo[0]['lastColumnIndex'] = max($worksheetInfo[0]['lastColumnIndex'], count($rowData) - 1); - $rowData = fgetcsv($fileHandle, 0, $delimiter, $this->enclosure, $this->escapeCharacter); + $rowData = self::getCsv($fileHandle, 0, $delimiter, $this->enclosure, $this->escapeCharacter); } $worksheetInfo[0]['lastColumnLetter'] = Coordinate::stringFromColumnIndex($worksheetInfo[0]['lastColumnIndex'] + 1); @@ -379,7 +390,7 @@ private function loadStringOrFile(string $filename, Spreadsheet $spreadsheet, bo // Loop through each line of the file in turn $delimiter = $this->delimiter ?? ''; - $rowData = fgetcsv($fileHandle, 0, $delimiter, $this->enclosure, $this->escapeCharacter); + $rowData = self::getCsv($fileHandle, 0, $delimiter, $this->enclosure, $this->escapeCharacter); $valueBinder = Cell::getValueBinder(); $preserveBooleanString = method_exists($valueBinder, 'getBooleanConversion') && $valueBinder->getBooleanConversion(); $this->getTrue = Calculation::getTRUE(); @@ -416,7 +427,7 @@ private function loadStringOrFile(string $filename, Spreadsheet $spreadsheet, bo } ++$columnLetter; } - $rowData = fgetcsv($fileHandle, 0, $delimiter, $this->enclosure, $this->escapeCharacter); + $rowData = self::getCsv($fileHandle, 0, $delimiter, $this->enclosure, $this->escapeCharacter); ++$currentRow; } @@ -527,6 +538,11 @@ public function getContiguous(): bool return $this->contiguous; } + /** + * Php9 intends to drop support for this parameter in fgetcsv. + * Not yet ready to mark deprecated in order to give users + * a migration path. + */ public function setEscapeCharacter(string $escapeCharacter): self { $this->escapeCharacter = $escapeCharacter; @@ -536,7 +552,7 @@ public function setEscapeCharacter(string $escapeCharacter): self public function getEscapeCharacter(): string { - return $this->escapeCharacter; + return $this->escapeCharacter ?? self::$defaultEscapeCharacter; } /** @@ -648,4 +664,28 @@ public function setSheetNameIsFileName(bool $sheetNameIsFileName): self return $this; } + + /** + * Php8.4 deprecates use of anything other than null string + * as escape Character. + * + * @param resource $stream + * @param null|int<0, max> $length + * + * @return array|false + */ + private static function getCsv( + $stream, + ?int $length = null, + string $separator = ',', + string $enclosure = '"', + ?string $escape = null + ): array|false { + $escape = $escape ?? self::$defaultEscapeCharacter; + if (PHP_VERSION_ID >= 80400 && $escape !== '') { + return @fgetcsv($stream, $length, $separator, $enclosure, $escape); + } + + return fgetcsv($stream, $length, $separator, $enclosure, $escape); + } } diff --git a/src/PhpSpreadsheet/Reader/Security/XmlScanner.php b/src/PhpSpreadsheet/Reader/Security/XmlScanner.php index 979325a44c..362a70c127 100644 --- a/src/PhpSpreadsheet/Reader/Security/XmlScanner.php +++ b/src/PhpSpreadsheet/Reader/Security/XmlScanner.php @@ -35,15 +35,11 @@ private static function forceString(mixed $arg): string private function toUtf8(string $xml): string { - $pattern = '/encoding="(.*?)"/'; - $result = preg_match($pattern, $xml, $matches); - $charset = strtoupper($result ? $matches[1] : 'UTF-8'); - + $charset = $this->findCharSet($xml); if ($charset !== 'UTF-8') { $xml = self::forceString(mb_convert_encoding($xml, 'UTF-8', $charset)); - $result = preg_match($pattern, $xml, $matches); - $charset = strtoupper($result ? $matches[1] : 'UTF-8'); + $charset = $this->findCharSet($xml); if ($charset !== 'UTF-8') { throw new Reader\Exception('Suspicious Double-encoded XML, spreadsheet file load() aborted to prevent XXE/XEE attacks'); } @@ -52,6 +48,22 @@ private function toUtf8(string $xml): string return $xml; } + private function findCharSet(string $xml): string + { + $patterns = [ + '/encoding\\s*=\\s*"([^"]*]?)"/', + "/encoding\\s*=\\s*'([^']*?)'/", + ]; + + foreach ($patterns as $pattern) { + if (preg_match($pattern, $xml, $matches)) { + return strtoupper($matches[1]); + } + } + + return 'UTF-8'; + } + /** * Scan the XML for use of + +ADw-+ACE-DOCTYPE+ACA-foo+ACA-+AFs-+ADw-+ACE-ENTITY+ACA-toreplace+ACA-+ACI-xxe+AF8-test+ACI-+AD4-+ACA-+AF0-+AD4-+AAo-+ADw-sst+ACA-xmlns+AD0-+ACI-http://schemas.openxmlformats.org/spreadsheetml/2006/main+ACI-+ACA-count+AD0-+ACI-2+ACI-+ACA-uniqueCount+AD0-+ACI-1+ACI-+AD4-+ADw-si+AD4-+ADw-t+AD4-+ACY-toreplace+ADs-+ADw-/t+AD4-+ADw-/si+AD4-+ADw-/sst+AD4- diff --git a/tests/data/Reader/Xml/XEETestValidUTF-8-single-quote.xml b/tests/data/Reader/Xml/XEETestValidUTF-8-single-quote.xml new file mode 100644 index 0000000000..e478c7d408 --- /dev/null +++ b/tests/data/Reader/Xml/XEETestValidUTF-8-single-quote.xml @@ -0,0 +1,4 @@ + + + test: Valid +