Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,8 @@ bin/*
coverage/
coverage.xml
.phpunit.cache/

# infection artifacts
infection.log
infection-summary.log
.infection-tmp/
6 changes: 3 additions & 3 deletions ROADMAP.md
Original file line number Diff line number Diff line change
Expand Up @@ -81,17 +81,17 @@ Non-breaking follow-on to v3.2.
Not tied to a specific release; picked up as time allows.

**Testing depth:**
- [ ] Mutation testing with Infection. Surfaces tests whose assertions are too weak to catch small code mutations. Target ≥85% MSI (mutation score indicator).
- [~] Mutation testing with Infection — wired in via `composer infect` with thresholds `minMsi=74`, `minCoveredMsi=79` (current baseline). Target remains ≥85% MSI / ≥85% covered MSI; raise thresholds as tests are strengthened.
- [ ] Property-based testing (Eris or Pest plugin): generate random valid addresses, assert `parseSingle(parseSingle($x)->simpleAddress)` round-trips; perturb bytes and assert error codes.
- [ ] Parse.php line coverage 86.69% → ≥95% — remaining gaps are obscure error branches and the "shouldn't ever get here" default case.
- [ ] CI matrix: add PHP 8.5 once released.

**Static analysis:**
- [ ] PHPStan level 6 → 8 (or `max`) — tighter generics and inference on the state machine. Likely requires additional docblock array shapes.
- [x] PHPStan level 6 → 8 — tighter generics and inference; required four small nullable-return guards (`idn_to_ascii`, `mb_split`, `file_get_contents`) and one local docblock shape on `parseMultiple()`.
- [ ] Add Psalm alongside PHPStan for cross-tool coverage; keep both green.

**Performance:**
- [ ] PhpBench suite: parsing throughput for realistic inputs (single ASCII, multi-address batch, UTF-8, IDN, obs-route). Establishes a baseline before any optimization.
- [x] PhpBench suite — `benchmarks/ParseBench.php` covers single ASCII, name-addr, UTF-8 local-part, IDN, obs-route, 10-address comma batch, 100-address `parseStream` batch, invalid inputs, and comment extraction. Run with `composer bench`.
- [ ] Profile the state machine under mailing-list-sized inputs. Likely hot path: `mb_substr` in the main loop — investigate byte iteration for pure-ASCII inputs.

**Community / documentation:**
Expand Down
108 changes: 108 additions & 0 deletions benchmarks/ParseBench.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
<?php

namespace Email\Benchmarks;

use Email\Parse;
use Email\ParseOptions;

/**
* Baseline performance benchmarks for Email\Parse.
*
* Run with:
* composer bench
*
* Numbers are reported as time-per-iteration (see PhpBench `Mo` reports).
* Use these to catch regressions across releases; absolute throughput
* depends on the host PHP version and hardware.
*
* @BeforeMethods({"setUp"})
* @Revs(1000)
* @Iterations(3)
* @Warmup(1)
*/
class ParseBench
{
private Parse $legacy;
private Parse $rfc5322;
private Parse $rfc6531;

/** @var array<string> */
private array $batch1000;

public function setUp(): void
{
$this->legacy = new Parse();
$this->rfc5322 = new Parse(null, ParseOptions::rfc5322());
$this->rfc6531 = new Parse(null, ParseOptions::rfc6531()->withRequireFqdn(false));

// Realistic mailing-list batch: 1000 addresses with a mix of forms.
$this->batch1000 = [];
for ($i = 0; $i < 1000; ++$i) {
$this->batch1000[] = "user{$i}@example.com";
}
}

/** @Subject */
public function benchSimpleAsciiAddress(): void
{
$this->legacy->parseSingle('user@example.com');
}

/** @Subject */
public function benchSimpleAsciiAddressArrayApi(): void
{
$this->legacy->parse('user@example.com', false);
}

/** @Subject */
public function benchNameAddr(): void
{
$this->legacy->parseSingle('"John Q. Public" <john@example.com>');
}

/** @Subject */
public function benchUtf8LocalPart(): void
{
$this->rfc6531->parseSingle('müller@example.com');
}

/** @Subject */
public function benchIdnDomain(): void
{
$this->rfc6531->parseSingle('user@münchen.de');
}

/** @Subject */
public function benchObsRoute(): void
{
$this->rfc5322->parseSingle('<@hostA,@hostB:user@hostC>');
}

/** @Subject */
public function benchBatch10Comma(): void
{
$this->legacy->parseMultiple('a@a.com, b@b.com, c@c.com, d@d.com, e@e.com, f@f.com, g@g.com, h@h.com, i@i.com, j@j.com');
}

/** @Subject */
public function benchBatch100StreamCount(): void
{
$batch = array_slice($this->batch1000, 0, 100);
$n = 0;
foreach ($this->legacy->parseStream($batch) as $_) {
++$n;
}
}

/** @Subject */
public function benchInvalidAddress(): void
{
$this->legacy->parseSingle('not-an-email');
}

/** @Subject */
public function benchCommentExtraction(): void
{
$this->legacy->parseSingle('user (the main account)@example.com (home)');
}
}
14 changes: 11 additions & 3 deletions composer.json
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,9 @@
"friendsofphp/php-cs-fixer": "^3.65",
"phpstan/phpstan": "^2.0",
"phpunit/phpunit": "^9.6",
"symfony/yaml": "^6.4|^7.2"
"symfony/yaml": "^6.4|^7.2",
"infection/infection": "^0.29.8",
"phpbench/phpbench": "^1.4"
},
"require": {
"php": "^8.1",
Expand All @@ -40,11 +42,15 @@
}
},
"config": {
"bin-dir": "bin"
"bin-dir": "bin",
"allow-plugins": {
"infection/extension-installer": true
}
},
"autoload-dev": {
"psr-4": {
"Email\\Tests\\": "tests/"
"Email\\Tests\\": "tests/",
"Email\\Benchmarks\\": "benchmarks/"
}
},
"scripts": {
Expand All @@ -53,6 +59,8 @@
"cs:check": "php-cs-fixer fix --dry-run --diff",
"cs:fix": "php-cs-fixer fix",
"stan": "phpstan analyse --memory-limit=512M",
"infect": "XDEBUG_MODE=coverage infection --threads=max",
"bench": "phpbench run --report=default",
"ci": [
"@cs:check",
"@stan",
Expand Down
28 changes: 28 additions & 0 deletions infection.json5
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
{
// Infection mutation-testing configuration.
//
// Usage: `composer infect` (or `bin/infection`) runs the full mutation suite
// against the PHPUnit suite and reports the Mutation Score Indicator (MSI).
// MSI is the percentage of mutations that caused at least one test to fail.
//
// Thresholds are set just below the current baseline (74% MSI / 79%
// covered MSI) so future regressions fail CI, while the full 85% target
// is an ongoing aspiration tracked in ROADMAP.md. Raise these numbers as
// the test suite improves.
"$schema": "vendor/infection/infection/resources/schema.json",
"source": {
"directories": ["src"]
},
"logs": {
"text": "infection.log",
"summary": "infection-summary.log"
},
"tmpDir": ".infection-tmp",
"mutators": {
"@default": true
},
"minMsi": 74,
"minCoveredMsi": 79,
"timeout": 10,
"testFramework": "phpunit"
}
9 changes: 9 additions & 0 deletions phpbench.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
{
"$schema": "vendor/phpbench/phpbench/phpbench.schema.json",
"runner.bootstrap": "autoload.php.dist",
"runner.path": "benchmarks",
"runner.file_pattern": "*Bench.php",
"runner.iterations": 3,
"runner.revs": 1000,
"runner.warmup": 1
}
2 changes: 1 addition & 1 deletion phpstan.neon
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ includes:
- phpstan-baseline.neon

parameters:
level: 6
level: 8
paths:
- src
- tests
Expand Down
13 changes: 11 additions & 2 deletions src/Parse.php
Original file line number Diff line number Diff line change
Expand Up @@ -229,7 +229,10 @@ public function parseSingle(string $email, string $encoding = 'UTF-8'): ParsedEm
*/
public function parseMultiple(string $emails, string $encoding = 'UTF-8'): ParseResult
{
return ParseResult::fromArray($this->parse($emails, true, $encoding));
/** @var array{success: bool, reason: ?string, email_addresses: array<int, array<string, mixed>>} $raw */
$raw = $this->parse($emails, true, $encoding);

return ParseResult::fromArray($raw);
}

/**
Expand Down Expand Up @@ -617,7 +620,8 @@ public function parse(string $emails, bool $multiple = true, string $encoding =
try {
// Test by trying to encode the current character into Punycode
// Punycode should match the traditional domain name subset of characters
if (preg_match('/[a-z0-9\-]/', idn_to_ascii($curChar))) {
$punycoded = idn_to_ascii($curChar);
if ($punycoded !== false && preg_match('/[a-z0-9\-]/', $punycoded)) {
$emailAddress['domain'] .= $curChar;
} else {
$emailAddress['invalid'] = true;
Expand Down Expand Up @@ -1384,6 +1388,11 @@ protected function validateDomainName(string $domain, string $encoding = 'UTF-8'
if ($origEncoding) {
mb_regex_encoding($origEncoding);
}
// mb_split() can return false on failure; treat that as a validation
// failure rather than iterating over a bogus value.
if ($parts === false) {
return ['valid' => false, 'reason' => 'Domain name could not be tokenized', 'code' => Err::DomainInvalid];
}
$maxLabelLen = $this->options->getLengthLimits()->maxDomainLabelLength;
foreach ($parts as $part) {
if (strlen($part) > $maxLabelLen) {
Expand Down
90 changes: 86 additions & 4 deletions tests/ParseTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,9 @@ private function buildOptions(array $test): ParseOptions

public function testParseEmailAddresses()
{
$tests = \Symfony\Component\Yaml\Yaml::parse(file_get_contents(__DIR__.'/testspec.yml'));
$yaml = file_get_contents(__DIR__.'/testspec.yml');
$this->assertNotFalse($yaml, 'testspec.yml must be readable');
$tests = \Symfony\Component\Yaml\Yaml::parse($yaml);

foreach ($tests as $testIndex => $test) {
$emails = $test['emails'];
Expand Down Expand Up @@ -467,13 +469,63 @@ public function testPolicyFailureIsWarningSeverity(): void
$this->assertSame(\Email\ValidationSeverity::Warning, $result->invalidSeverity());
}

public function testLengthLimitsDefaultsMatchRfc(): void
{
// RFC 5321 §4.5.3.1: 64-octet local-part, RFC 3696 EID 1690: 254-octet
// total, RFC 1035 §2.3.4: 63-octet domain label. Assert the exact values
// so mutations to the constructor defaults or preset factories are caught.
$d = \Email\LengthLimits::createDefault();
$this->assertSame(64, $d->maxLocalPartLength);
$this->assertSame(254, $d->maxTotalLength);
$this->assertSame(63, $d->maxDomainLabelLength);

$r = \Email\LengthLimits::createRelaxed();
$this->assertSame(128, $r->maxLocalPartLength);
$this->assertSame(512, $r->maxTotalLength);
$this->assertSame(128, $r->maxDomainLabelLength);

// Default constructor without args matches createDefault().
$empty = new \Email\LengthLimits();
$this->assertSame(64, $empty->maxLocalPartLength);
$this->assertSame(254, $empty->maxTotalLength);
$this->assertSame(63, $empty->maxDomainLabelLength);
}

public function testEveryErrorCodeHasASeverity(): void
{
// Defensive: ensure no new ParseErrorCode is added without mapping its severity.
// Explicit mapping assertion — codes classified as Warning are structural
// violations that callers may reasonably accept in non-SMTP contexts;
// everything else is Critical. Adding a new ParseErrorCode without updating
// this mapping should fail the last assertion below.
$warning = [
\Email\ParseErrorCode::Utf8NotAllowedInLocalPart,
\Email\ParseErrorCode::C0ControlInLocalPart,
\Email\ParseErrorCode::C1ControlInLocalPart,
\Email\ParseErrorCode::C1ControlInQuotedString,
\Email\ParseErrorCode::EmptyQuotedLocalPart,
\Email\ParseErrorCode::FqdnRequired,
\Email\ParseErrorCode::IpNotInGlobalRange,
\Email\ParseErrorCode::Ipv6NotInGlobalRange,
\Email\ParseErrorCode::LocalPartTooLong,
\Email\ParseErrorCode::TotalLengthExceeded,
\Email\ParseErrorCode::DomainTooLong,
\Email\ParseErrorCode::DomainLabelTooLong,
\Email\ParseErrorCode::PunycodeConversionFailed,
];

foreach (\Email\ParseErrorCode::cases() as $code) {
$severity = $code->severity();
$this->assertInstanceOf(\Email\ValidationSeverity::class, $severity);
$expected = in_array($code, $warning, true)
? \Email\ValidationSeverity::Warning
: \Email\ValidationSeverity::Critical;
$this->assertSame(
$expected,
$code->severity(),
"{$code->name} should be {$expected->value}",
);
}

// Every Warning entry must be a real case (catches typos in the list above).
$this->assertCount(13, $warning);
}

public function testParseStreamYieldsTypedObjects(): void
Expand Down Expand Up @@ -706,6 +758,23 @@ public function testToJsonSerializesErrorCodeAsString(): void
$this->assertSame('multiple_opening_angle', $decoded['invalid_reason_code']);
}

public function testToJsonEmitsUnescapedUnicode(): void
{
// Asserts JSON_UNESCAPED_UNICODE is in the flag set — without it, "münchen"
// would become "m\u00fcnchen". Catches bitwise-or regressions in toJson().
$typed = Parse::getInstance()->parseSingle('user@münchen.de');
$this->assertStringContainsString('münchen', $typed->toJson());
$this->assertStringNotContainsString('\u00', $typed->toJson());
}

public function testToJsonPassesCallerFlagsThrough(): void
{
// Caller-supplied flags must reach json_encode (bitwise-or, not &).
$typed = Parse::getInstance()->parseSingle('user@example.com');
$pretty = $typed->toJson(JSON_PRETTY_PRINT);
$this->assertStringContainsString("\n", $pretty);
}

public function testStringableReturnsSimpleAddressWhenValid(): void
{
$typed = Parse::getInstance()->parseSingle('"J Doe" <john@example.com>');
Expand Down Expand Up @@ -777,6 +846,19 @@ public function testParseResultToJsonProducesParseableJson(): void
$this->assertSame('a', $decoded['email_addresses'][0]['local_part']);
}

public function testParseResultToJsonEmitsUnescapedUnicode(): void
{
$typed = Parse::getInstance()->parseMultiple('user@münchen.de');
$this->assertStringContainsString('münchen', $typed->toJson());
$this->assertStringNotContainsString('\u00', $typed->toJson());
}

public function testParseResultToJsonPassesCallerFlagsThrough(): void
{
$typed = Parse::getInstance()->parseMultiple('a@a.com, b@b.com');
$this->assertStringContainsString("\n", $typed->toJson(JSON_PRETTY_PRINT));
}

public function testLocalPartNormalizerRewritesLocalPart(): void
{
// Gmail-style: strip dots and +tags from the local-part for gmail.com.
Expand Down
Loading