Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -72,10 +72,11 @@ jobs:
run: bin/phpunit --coverage-clover=coverage.xml --coverage-text

- name: Upload coverage to Codecov
uses: codecov/codecov-action@v4
uses: codecov/codecov-action@v5
with:
files: ./coverage.xml
fail_ci_if_error: false
token: ${{ secrets.CODECOV_TOKEN }}
verbose: true

code-style:
Expand Down
28 changes: 21 additions & 7 deletions .scrutinizer.yml
Original file line number Diff line number Diff line change
@@ -1,15 +1,29 @@
# .scrutinizer.yml

filter:
paths:
- 'src/*'
excluded_paths:
- 'tests/*'
- 'vendor/*'

checks:
php:
code_rating: true
duplication: true

build:
image: default-jammy
tests:
override:
-
command: 'XDEBUG_MODE=coverage bin/phpunit --coverage-clover=.coverage'
coverage:
file: '.coverage'
format: 'clover'
nodes:
analysis:
tests:
override:
- php-scrutinizer-run
tests:
tests:
override:
-
command: 'XDEBUG_MODE=coverage bin/phpunit --coverage-clover=.coverage'
coverage:
file: '.coverage'
format: 'clover'
58 changes: 56 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
email-parse
===========

[![Support on Patreon](https://img.shields.io/badge/Patreon-Support%20Me-f96854?logo=patreon)](https://www.patreon.com/cw/MatthewJMucklo)

[![CI](https://github.com/mmucklo/email-parse/workflows/CI/badge.svg)](https://github.com/mmucklo/email-parse/actions)
[![codecov](https://codecov.io/gh/mmucklo/email-parse/branch/master/graph/badge.svg)](https://codecov.io/gh/mmucklo/email-parse)
[![Scrutinizer Code Quality](https://scrutinizer-ci.com/g/mmucklo/email-parse/badges/quality-score.png?b=master)](https://scrutinizer-ci.com/g/mmucklo/email-parse/?branch=master)
Expand All @@ -11,7 +13,7 @@ email-parse

Email\Parse is a multiple (and single) batch email address parser that is reasonably RFC822 / RFC2822 compliant.

It parses a list of 1 to n email addresses separated by space or comma
It parses a list of 1 to n email addresses separated by space, comma, or semicolon (configurable).

Installation:
-------------
Expand All @@ -27,12 +29,64 @@ Add this line to your composer.json "require" section:
Usage:
------

### Basic Usage

```php
use Email\Parse;

$result = Parse::getInstance()->parse("[email protected] [email protected]");
```

### Advanced Usage with ParseOptions

You can configure separator behavior and other parsing options using `ParseOptions`:

```php
use Email\Parse;
use Email\ParseOptions;

// Example 1: Use comma and semicolon as separators (default behavior includes whitespace)
$options = new ParseOptions([], [',', ';']);
$parser = new Parse(null, $options);
$result = $parser->parse("[email protected]; [email protected], [email protected]");

// Example 2: Disable whitespace as separator (only comma and semicolon work)
$options = new ParseOptions([], [',', ';'], false);
$parser = new Parse(null, $options);
$result = $parser->parse("[email protected]; [email protected]"); // Works - uses semicolon
$result = $parser->parse("[email protected] [email protected]"); // Won't split - whitespace not a separator

// Example 3: Names with spaces always work regardless of whitespace separator setting
$options = new ParseOptions([], [',', ';'], false);
$parser = new Parse(null, $options);
$result = $parser->parse("John Doe <[email protected]>, Jane Smith <[email protected]>");
// Returns 2 valid emails with names preserved
```

#### ParseOptions Constructor

```php
/**
* @param array $bannedChars Array of characters to ban from email addresses (e.g., ['%', '!'])
* @param array $separators Array of separator characters (default: [','])
* @param bool $useWhitespaceAsSeparator Whether to treat whitespace/newlines as separators (default: true)
*/
public function __construct(
array $bannedChars = [],
array $separators = [','],
bool $useWhitespaceAsSeparator = true
)
```

#### Supported Separators

- **Comma (`,`)** - Configured via `$separators` parameter
- **Semicolon (`;`)** - Configured via `$separators` parameter
- **Whitespace (space, tab, newlines)** - Controlled by `$useWhitespaceAsSeparator` parameter
- **Mixed separators** - All configured separators work together seamlessly

**Note:** When `useWhitespaceAsSeparator` is `false`, whitespace is still properly cleaned up and names with spaces (like "John Doe") continue to work correctly.

Notes:
======
This should be RFC 2822 compliant, although it will let a few obsolete RFC 822 addresses through such as `test"test"[email protected]` (note the quoted string in the middle of the address, which may be obsolete as of RFC 2822). However it wont allow escaping outside of quotes such as `test@[email protected]`. This would have to be written as `"test@test"@xyz.com`
Expand All @@ -51,7 +105,7 @@ how-about-comments(this is a comment!!)@xyz.com
```php
/**
* function parse($emails, $multiple = true, $encoding = 'UTF-8')
* @param string $emails List of Email addresses separated by comma or space if multiple
* @param string $emails List of Email addresses separated by configured separators (comma, semicolon, whitespace by default)
* @param bool $multiple (optional, default: true) Whether to parse for multiple email addresses or not
* @param string $encoding (optional, default: 'UTF-8')The encoding if not 'UTF-8'
* @return: see below: */
Expand Down
34 changes: 17 additions & 17 deletions src/Parse.php
Original file line number Diff line number Diff line change
Expand Up @@ -34,9 +34,9 @@ class Parse
protected ?LoggerInterface $logger = null;

/**
* @var ?ParseOptions
* @var ParseOptions
*/
protected ?ParseOptions $options;
protected ParseOptions $options;

/**
* Allow Parse to be instantiated as a singleton.
Expand Down Expand Up @@ -86,9 +86,9 @@ public function setOptions(ParseOptions $options): Parse
}

/**
* @return ?ParseOptions
* @return ParseOptions
*/
public function getOptions(): ?ParseOptions
public function getOptions(): ParseOptions
{
return $this->options;
}
Expand Down Expand Up @@ -276,12 +276,10 @@ public function parse(string $emails, bool $multiple = true, string $encoding =
case self::STATE_SKIP_AHEAD:
// Skip ahead is set when a bad email address is encountered
// It's supposed to skip to the next delimiter and continue parsing from there
if ($multiple &&
(' ' == $curChar ||
"\r" == $curChar ||
"\n" == $curChar ||
"\t" == $curChar ||
',' == $curChar)) {
$isWhitespaceSeparator = $this->options->getUseWhitespaceAsSeparator() &&
(' ' == $curChar || "\r" == $curChar || "\n" == $curChar || "\t" == $curChar);

if ($multiple && ($isWhitespaceSeparator || isset($this->options->getSeparators()[$curChar]))) {
$state = self::STATE_END_ADDRESS;
} else {
$emailAddress['original_address'] .= $curChar;
Expand Down Expand Up @@ -313,7 +311,7 @@ public function parse(string $emails, bool $multiple = true, string $encoding =
// Fall through
// no break
case self::STATE_ADDRESS:
if (',' != $curChar || !$multiple) {
if (!isset($this->options->getSeparators()[$curChar]) || !$multiple) {
$emailAddress['original_address'] .= $curChar;
}

Expand All @@ -323,8 +321,8 @@ public function parse(string $emails, bool $multiple = true, string $encoding =
$commentNestLevel = 1;

break;
} elseif (',' == $curChar) {
// Handle Comma
} elseif (isset($this->options->getSeparators()[$curChar])) {
// Handle separator (comma, semicolon, etc.)
if ($multiple && (self::STATE_DOMAIN == $subState || self::STATE_AFTER_DOMAIN == $subState)) {
// If we're already in the domain part, this should be the end of the address
$state = self::STATE_END_ADDRESS;
Expand All @@ -333,9 +331,9 @@ public function parse(string $emails, bool $multiple = true, string $encoding =
} else {
$emailAddress['invalid'] = true;
if ($multiple || ($i + 5) >= $len) {
$emailAddress['invalid_reason'] = 'Misplaced Comma or missing "@" symbol';
$emailAddress['invalid_reason'] = 'Misplaced separator or missing "@" symbol';
} else {
$emailAddress['invalid_reason'] = 'Comma not permitted - only one email address allowed';
$emailAddress['invalid_reason'] = 'Separator not permitted - only one email address allowed';
}
}
} elseif (' ' == $curChar ||
Expand Down Expand Up @@ -366,8 +364,10 @@ public function parse(string $emails, bool $multiple = true, string $encoding =
$emailAddress['invalid'] = true;
$emailAddress['invalid_reason'] = 'Email Address contains whitespace';
}
} elseif (self::STATE_DOMAIN == $subState || self::STATE_AFTER_DOMAIN == $subState) {
// If we're already in the domain part, this should be the end of the whole address
} elseif ($this->options->getUseWhitespaceAsSeparator() &&
(self::STATE_DOMAIN == $subState || self::STATE_AFTER_DOMAIN == $subState)) {
// If we're already in the domain part and whitespace is a separator,
// this should be the end of the whole address
$state = self::STATE_END_ADDRESS;

break;
Expand Down
47 changes: 45 additions & 2 deletions src/ParseOptions.php
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,29 @@

class ParseOptions
{
/** @var array<string, bool> */
private array $bannedChars = [];
/** @var array<string, bool> */
private array $separators = [];
private bool $useWhitespaceAsSeparator = true;

public function __construct(array $bannedChars = [])
/**
* @param array<string> $bannedChars
* @param array<string> $separators
* @param bool $useWhitespaceAsSeparator
*/
public function __construct(array $bannedChars = [], array $separators = [','], bool $useWhitespaceAsSeparator = true)
{
if ($bannedChars) {
$this->setBannedChars($bannedChars);
}
$this->setSeparators($separators);
$this->useWhitespaceAsSeparator = $useWhitespaceAsSeparator;
}

/**
* @param array<string> $bannedChars
*/
public function setBannedChars(array $bannedChars): void
{
$this->bannedChars = [];
Expand All @@ -22,10 +36,39 @@ public function setBannedChars(array $bannedChars): void
}

/**
* @return array
* @return array<string, bool>
*/
public function getBannedChars(): array
{
return $this->bannedChars;
}

/**
* @param array<string> $separators
*/
public function setSeparators(array $separators): void
{
$this->separators = [];
foreach ($separators as $separator) {
$this->separators[$separator] = true;
}
}

/**
* @return array<string, bool>
*/
public function getSeparators(): array
{
return $this->separators;
}

public function setUseWhitespaceAsSeparator(bool $useWhitespaceAsSeparator): void
{
$this->useWhitespaceAsSeparator = $useWhitespaceAsSeparator;
}

public function getUseWhitespaceAsSeparator(): bool
{
return $this->useWhitespaceAsSeparator;
}
}
10 changes: 9 additions & 1 deletion tests/ParseTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
require_once __DIR__.'/../src/Parse.php';

use Email\Parse;
use Email\ParseOptions;

class ParseTest extends \PHPUnit\Framework\TestCase
{
Expand All @@ -18,7 +19,14 @@ public function testParseEmailAddresses()
$multiple = $test['multiple'];
$result = $test['result'];

$this->assertSame($result, Parse::getInstance()->parse($emails, $multiple));
// Check if test specifies use_whitespace_as_separator option
$useWhitespaceAsSeparator = $test['use_whitespace_as_separator'] ?? true;

// Configure Parse to support both comma and semicolon as separators
$options = new ParseOptions(['%', '!'], [',', ';'], $useWhitespaceAsSeparator);
$parser = new Parse(null, $options);

$this->assertSame($result, $parser->parse($emails, $multiple));
}
}
}
Loading