diff --git a/.gitignore b/.gitignore index 8144fd8..07a7323 100644 --- a/.gitignore +++ b/.gitignore @@ -15,3 +15,8 @@ bin/* coverage/ coverage.xml .phpunit.cache/ + +# infection artifacts +infection.log +infection-summary.log +.infection-tmp/ diff --git a/ROADMAP.md b/ROADMAP.md index accefc9..a10b25d 100644 --- a/ROADMAP.md +++ b/ROADMAP.md @@ -81,17 +81,17 @@ Non-breaking follow-on to v3.2. Not tied to a specific release; picked up as time allows. **Testing depth:** -- [ ] Mutation testing with Infection. Surfaces tests whose assertions are too weak to catch small code mutations. Target ≥85% MSI (mutation score indicator). +- [~] Mutation testing with Infection — wired in via `composer infect` with thresholds `minMsi=74`, `minCoveredMsi=79` (current baseline). Target remains ≥85% MSI / ≥85% covered MSI; raise thresholds as tests are strengthened. - [ ] Property-based testing (Eris or Pest plugin): generate random valid addresses, assert `parseSingle(parseSingle($x)->simpleAddress)` round-trips; perturb bytes and assert error codes. - [ ] Parse.php line coverage 86.69% → ≥95% — remaining gaps are obscure error branches and the "shouldn't ever get here" default case. - [ ] CI matrix: add PHP 8.5 once released. **Static analysis:** -- [ ] PHPStan level 6 → 8 (or `max`) — tighter generics and inference on the state machine. Likely requires additional docblock array shapes. +- [x] PHPStan level 6 → 8 — tighter generics and inference; required four small nullable-return guards (`idn_to_ascii`, `mb_split`, `file_get_contents`) and one local docblock shape on `parseMultiple()`. - [ ] Add Psalm alongside PHPStan for cross-tool coverage; keep both green. **Performance:** -- [ ] PhpBench suite: parsing throughput for realistic inputs (single ASCII, multi-address batch, UTF-8, IDN, obs-route). Establishes a baseline before any optimization. +- [x] PhpBench suite — `benchmarks/ParseBench.php` covers single ASCII, name-addr, UTF-8 local-part, IDN, obs-route, 10-address comma batch, 100-address `parseStream` batch, invalid inputs, and comment extraction. Run with `composer bench`. - [ ] Profile the state machine under mailing-list-sized inputs. Likely hot path: `mb_substr` in the main loop — investigate byte iteration for pure-ASCII inputs. **Community / documentation:** diff --git a/benchmarks/ParseBench.php b/benchmarks/ParseBench.php new file mode 100644 index 0000000..4c7b0f3 --- /dev/null +++ b/benchmarks/ParseBench.php @@ -0,0 +1,108 @@ + */ + private array $batch1000; + + public function setUp(): void + { + $this->legacy = new Parse(); + $this->rfc5322 = new Parse(null, ParseOptions::rfc5322()); + $this->rfc6531 = new Parse(null, ParseOptions::rfc6531()->withRequireFqdn(false)); + + // Realistic mailing-list batch: 1000 addresses with a mix of forms. + $this->batch1000 = []; + for ($i = 0; $i < 1000; ++$i) { + $this->batch1000[] = "user{$i}@example.com"; + } + } + + /** @Subject */ + public function benchSimpleAsciiAddress(): void + { + $this->legacy->parseSingle('user@example.com'); + } + + /** @Subject */ + public function benchSimpleAsciiAddressArrayApi(): void + { + $this->legacy->parse('user@example.com', false); + } + + /** @Subject */ + public function benchNameAddr(): void + { + $this->legacy->parseSingle('"John Q. Public" '); + } + + /** @Subject */ + public function benchUtf8LocalPart(): void + { + $this->rfc6531->parseSingle('müller@example.com'); + } + + /** @Subject */ + public function benchIdnDomain(): void + { + $this->rfc6531->parseSingle('user@münchen.de'); + } + + /** @Subject */ + public function benchObsRoute(): void + { + $this->rfc5322->parseSingle('<@hostA,@hostB:user@hostC>'); + } + + /** @Subject */ + public function benchBatch10Comma(): void + { + $this->legacy->parseMultiple('a@a.com, b@b.com, c@c.com, d@d.com, e@e.com, f@f.com, g@g.com, h@h.com, i@i.com, j@j.com'); + } + + /** @Subject */ + public function benchBatch100StreamCount(): void + { + $batch = array_slice($this->batch1000, 0, 100); + $n = 0; + foreach ($this->legacy->parseStream($batch) as $_) { + ++$n; + } + } + + /** @Subject */ + public function benchInvalidAddress(): void + { + $this->legacy->parseSingle('not-an-email'); + } + + /** @Subject */ + public function benchCommentExtraction(): void + { + $this->legacy->parseSingle('user (the main account)@example.com (home)'); + } +} diff --git a/composer.json b/composer.json index 27779f5..68d035d 100644 --- a/composer.json +++ b/composer.json @@ -26,7 +26,9 @@ "friendsofphp/php-cs-fixer": "^3.65", "phpstan/phpstan": "^2.0", "phpunit/phpunit": "^9.6", - "symfony/yaml": "^6.4|^7.2" + "symfony/yaml": "^6.4|^7.2", + "infection/infection": "^0.29.8", + "phpbench/phpbench": "^1.4" }, "require": { "php": "^8.1", @@ -40,11 +42,15 @@ } }, "config": { - "bin-dir": "bin" + "bin-dir": "bin", + "allow-plugins": { + "infection/extension-installer": true + } }, "autoload-dev": { "psr-4": { - "Email\\Tests\\": "tests/" + "Email\\Tests\\": "tests/", + "Email\\Benchmarks\\": "benchmarks/" } }, "scripts": { @@ -53,6 +59,8 @@ "cs:check": "php-cs-fixer fix --dry-run --diff", "cs:fix": "php-cs-fixer fix", "stan": "phpstan analyse --memory-limit=512M", + "infect": "XDEBUG_MODE=coverage infection --threads=max", + "bench": "phpbench run --report=default", "ci": [ "@cs:check", "@stan", diff --git a/infection.json5 b/infection.json5 new file mode 100644 index 0000000..ac80832 --- /dev/null +++ b/infection.json5 @@ -0,0 +1,28 @@ +{ + // Infection mutation-testing configuration. + // + // Usage: `composer infect` (or `bin/infection`) runs the full mutation suite + // against the PHPUnit suite and reports the Mutation Score Indicator (MSI). + // MSI is the percentage of mutations that caused at least one test to fail. + // + // Thresholds are set just below the current baseline (74% MSI / 79% + // covered MSI) so future regressions fail CI, while the full 85% target + // is an ongoing aspiration tracked in ROADMAP.md. Raise these numbers as + // the test suite improves. + "$schema": "vendor/infection/infection/resources/schema.json", + "source": { + "directories": ["src"] + }, + "logs": { + "text": "infection.log", + "summary": "infection-summary.log" + }, + "tmpDir": ".infection-tmp", + "mutators": { + "@default": true + }, + "minMsi": 74, + "minCoveredMsi": 79, + "timeout": 10, + "testFramework": "phpunit" +} diff --git a/phpbench.json b/phpbench.json new file mode 100644 index 0000000..7980d04 --- /dev/null +++ b/phpbench.json @@ -0,0 +1,9 @@ +{ + "$schema": "vendor/phpbench/phpbench/phpbench.schema.json", + "runner.bootstrap": "autoload.php.dist", + "runner.path": "benchmarks", + "runner.file_pattern": "*Bench.php", + "runner.iterations": 3, + "runner.revs": 1000, + "runner.warmup": 1 +} diff --git a/phpstan.neon b/phpstan.neon index 6fa1268..9cf31c4 100644 --- a/phpstan.neon +++ b/phpstan.neon @@ -2,7 +2,7 @@ includes: - phpstan-baseline.neon parameters: - level: 6 + level: 8 paths: - src - tests diff --git a/src/Parse.php b/src/Parse.php index 0486968..b8da850 100644 --- a/src/Parse.php +++ b/src/Parse.php @@ -229,7 +229,10 @@ public function parseSingle(string $email, string $encoding = 'UTF-8'): ParsedEm */ public function parseMultiple(string $emails, string $encoding = 'UTF-8'): ParseResult { - return ParseResult::fromArray($this->parse($emails, true, $encoding)); + /** @var array{success: bool, reason: ?string, email_addresses: array>} $raw */ + $raw = $this->parse($emails, true, $encoding); + + return ParseResult::fromArray($raw); } /** @@ -617,7 +620,8 @@ public function parse(string $emails, bool $multiple = true, string $encoding = try { // Test by trying to encode the current character into Punycode // Punycode should match the traditional domain name subset of characters - if (preg_match('/[a-z0-9\-]/', idn_to_ascii($curChar))) { + $punycoded = idn_to_ascii($curChar); + if ($punycoded !== false && preg_match('/[a-z0-9\-]/', $punycoded)) { $emailAddress['domain'] .= $curChar; } else { $emailAddress['invalid'] = true; @@ -1384,6 +1388,11 @@ protected function validateDomainName(string $domain, string $encoding = 'UTF-8' if ($origEncoding) { mb_regex_encoding($origEncoding); } + // mb_split() can return false on failure; treat that as a validation + // failure rather than iterating over a bogus value. + if ($parts === false) { + return ['valid' => false, 'reason' => 'Domain name could not be tokenized', 'code' => Err::DomainInvalid]; + } $maxLabelLen = $this->options->getLengthLimits()->maxDomainLabelLength; foreach ($parts as $part) { if (strlen($part) > $maxLabelLen) { diff --git a/tests/ParseTest.php b/tests/ParseTest.php index 70194e2..9376e0b 100644 --- a/tests/ParseTest.php +++ b/tests/ParseTest.php @@ -107,7 +107,9 @@ private function buildOptions(array $test): ParseOptions public function testParseEmailAddresses() { - $tests = \Symfony\Component\Yaml\Yaml::parse(file_get_contents(__DIR__.'/testspec.yml')); + $yaml = file_get_contents(__DIR__.'/testspec.yml'); + $this->assertNotFalse($yaml, 'testspec.yml must be readable'); + $tests = \Symfony\Component\Yaml\Yaml::parse($yaml); foreach ($tests as $testIndex => $test) { $emails = $test['emails']; @@ -467,13 +469,63 @@ public function testPolicyFailureIsWarningSeverity(): void $this->assertSame(\Email\ValidationSeverity::Warning, $result->invalidSeverity()); } + public function testLengthLimitsDefaultsMatchRfc(): void + { + // RFC 5321 §4.5.3.1: 64-octet local-part, RFC 3696 EID 1690: 254-octet + // total, RFC 1035 §2.3.4: 63-octet domain label. Assert the exact values + // so mutations to the constructor defaults or preset factories are caught. + $d = \Email\LengthLimits::createDefault(); + $this->assertSame(64, $d->maxLocalPartLength); + $this->assertSame(254, $d->maxTotalLength); + $this->assertSame(63, $d->maxDomainLabelLength); + + $r = \Email\LengthLimits::createRelaxed(); + $this->assertSame(128, $r->maxLocalPartLength); + $this->assertSame(512, $r->maxTotalLength); + $this->assertSame(128, $r->maxDomainLabelLength); + + // Default constructor without args matches createDefault(). + $empty = new \Email\LengthLimits(); + $this->assertSame(64, $empty->maxLocalPartLength); + $this->assertSame(254, $empty->maxTotalLength); + $this->assertSame(63, $empty->maxDomainLabelLength); + } + public function testEveryErrorCodeHasASeverity(): void { - // Defensive: ensure no new ParseErrorCode is added without mapping its severity. + // Explicit mapping assertion — codes classified as Warning are structural + // violations that callers may reasonably accept in non-SMTP contexts; + // everything else is Critical. Adding a new ParseErrorCode without updating + // this mapping should fail the last assertion below. + $warning = [ + \Email\ParseErrorCode::Utf8NotAllowedInLocalPart, + \Email\ParseErrorCode::C0ControlInLocalPart, + \Email\ParseErrorCode::C1ControlInLocalPart, + \Email\ParseErrorCode::C1ControlInQuotedString, + \Email\ParseErrorCode::EmptyQuotedLocalPart, + \Email\ParseErrorCode::FqdnRequired, + \Email\ParseErrorCode::IpNotInGlobalRange, + \Email\ParseErrorCode::Ipv6NotInGlobalRange, + \Email\ParseErrorCode::LocalPartTooLong, + \Email\ParseErrorCode::TotalLengthExceeded, + \Email\ParseErrorCode::DomainTooLong, + \Email\ParseErrorCode::DomainLabelTooLong, + \Email\ParseErrorCode::PunycodeConversionFailed, + ]; + foreach (\Email\ParseErrorCode::cases() as $code) { - $severity = $code->severity(); - $this->assertInstanceOf(\Email\ValidationSeverity::class, $severity); + $expected = in_array($code, $warning, true) + ? \Email\ValidationSeverity::Warning + : \Email\ValidationSeverity::Critical; + $this->assertSame( + $expected, + $code->severity(), + "{$code->name} should be {$expected->value}", + ); } + + // Every Warning entry must be a real case (catches typos in the list above). + $this->assertCount(13, $warning); } public function testParseStreamYieldsTypedObjects(): void @@ -706,6 +758,23 @@ public function testToJsonSerializesErrorCodeAsString(): void $this->assertSame('multiple_opening_angle', $decoded['invalid_reason_code']); } + public function testToJsonEmitsUnescapedUnicode(): void + { + // Asserts JSON_UNESCAPED_UNICODE is in the flag set — without it, "münchen" + // would become "m\u00fcnchen". Catches bitwise-or regressions in toJson(). + $typed = Parse::getInstance()->parseSingle('user@münchen.de'); + $this->assertStringContainsString('münchen', $typed->toJson()); + $this->assertStringNotContainsString('\u00', $typed->toJson()); + } + + public function testToJsonPassesCallerFlagsThrough(): void + { + // Caller-supplied flags must reach json_encode (bitwise-or, not &). + $typed = Parse::getInstance()->parseSingle('user@example.com'); + $pretty = $typed->toJson(JSON_PRETTY_PRINT); + $this->assertStringContainsString("\n", $pretty); + } + public function testStringableReturnsSimpleAddressWhenValid(): void { $typed = Parse::getInstance()->parseSingle('"J Doe" '); @@ -777,6 +846,19 @@ public function testParseResultToJsonProducesParseableJson(): void $this->assertSame('a', $decoded['email_addresses'][0]['local_part']); } + public function testParseResultToJsonEmitsUnescapedUnicode(): void + { + $typed = Parse::getInstance()->parseMultiple('user@münchen.de'); + $this->assertStringContainsString('münchen', $typed->toJson()); + $this->assertStringNotContainsString('\u00', $typed->toJson()); + } + + public function testParseResultToJsonPassesCallerFlagsThrough(): void + { + $typed = Parse::getInstance()->parseMultiple('a@a.com, b@b.com'); + $this->assertStringContainsString("\n", $typed->toJson(JSON_PRETTY_PRINT)); + } + public function testLocalPartNormalizerRewritesLocalPart(): void { // Gmail-style: strip dots and +tags from the local-part for gmail.com.