diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 33edbbe..9930a50 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -72,10 +72,11 @@ jobs: run: bin/phpunit --coverage-clover=coverage.xml --coverage-text - name: Upload coverage to Codecov - uses: codecov/codecov-action@v4 + uses: codecov/codecov-action@v5 with: files: ./coverage.xml fail_ci_if_error: false + token: ${{ secrets.CODECOV_TOKEN }} verbose: true code-style: diff --git a/.scrutinizer.yml b/.scrutinizer.yml index ef8de24..53d3598 100644 --- a/.scrutinizer.yml +++ b/.scrutinizer.yml @@ -1,15 +1,29 @@ # .scrutinizer.yml +filter: + paths: + - 'src/*' + excluded_paths: + - 'tests/*' + - 'vendor/*' + checks: php: code_rating: true duplication: true + build: image: default-jammy - tests: - override: - - - command: 'XDEBUG_MODE=coverage bin/phpunit --coverage-clover=.coverage' - coverage: - file: '.coverage' - format: 'clover' + nodes: + analysis: + tests: + override: + - php-scrutinizer-run + tests: + tests: + override: + - + command: 'XDEBUG_MODE=coverage bin/phpunit --coverage-clover=.coverage' + coverage: + file: '.coverage' + format: 'clover' diff --git a/README.md b/README.md index 19d0338..650382d 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,8 @@ email-parse =========== +[![Support on Patreon](https://img.shields.io/badge/Patreon-Support%20Me-f96854?logo=patreon)](https://www.patreon.com/cw/MatthewJMucklo) + [![CI](https://github.com/mmucklo/email-parse/workflows/CI/badge.svg)](https://github.com/mmucklo/email-parse/actions) [![codecov](https://codecov.io/gh/mmucklo/email-parse/branch/master/graph/badge.svg)](https://codecov.io/gh/mmucklo/email-parse) [![Scrutinizer Code Quality](https://scrutinizer-ci.com/g/mmucklo/email-parse/badges/quality-score.png?b=master)](https://scrutinizer-ci.com/g/mmucklo/email-parse/?branch=master) @@ -11,7 +13,7 @@ email-parse Email\Parse is a multiple (and single) batch email address parser that is reasonably RFC822 / RFC2822 compliant. -It parses a list of 1 to n email addresses separated by space or comma +It parses a list of 1 to n email addresses separated by space, comma, or semicolon (configurable). Installation: ------------- @@ -27,12 +29,64 @@ Add this line to your composer.json "require" section: Usage: ------ +### Basic Usage + ```php use Email\Parse; $result = Parse::getInstance()->parse("a@aaa.com b@bbb.com"); ``` +### Advanced Usage with ParseOptions + +You can configure separator behavior and other parsing options using `ParseOptions`: + +```php +use Email\Parse; +use Email\ParseOptions; + +// Example 1: Use comma and semicolon as separators (default behavior includes whitespace) +$options = new ParseOptions([], [',', ';']); +$parser = new Parse(null, $options); +$result = $parser->parse("a@aaa.com; b@bbb.com, c@ccc.com"); + +// Example 2: Disable whitespace as separator (only comma and semicolon work) +$options = new ParseOptions([], [',', ';'], false); +$parser = new Parse(null, $options); +$result = $parser->parse("a@aaa.com; b@bbb.com"); // Works - uses semicolon +$result = $parser->parse("a@aaa.com b@bbb.com"); // Won't split - whitespace not a separator + +// Example 3: Names with spaces always work regardless of whitespace separator setting +$options = new ParseOptions([], [',', ';'], false); +$parser = new Parse(null, $options); +$result = $parser->parse("John Doe , Jane Smith "); +// Returns 2 valid emails with names preserved +``` + +#### ParseOptions Constructor + +```php +/** + * @param array $bannedChars Array of characters to ban from email addresses (e.g., ['%', '!']) + * @param array $separators Array of separator characters (default: [',']) + * @param bool $useWhitespaceAsSeparator Whether to treat whitespace/newlines as separators (default: true) + */ +public function __construct( + array $bannedChars = [], + array $separators = [','], + bool $useWhitespaceAsSeparator = true +) +``` + +#### Supported Separators + +- **Comma (`,`)** - Configured via `$separators` parameter +- **Semicolon (`;`)** - Configured via `$separators` parameter +- **Whitespace (space, tab, newlines)** - Controlled by `$useWhitespaceAsSeparator` parameter +- **Mixed separators** - All configured separators work together seamlessly + +**Note:** When `useWhitespaceAsSeparator` is `false`, whitespace is still properly cleaned up and names with spaces (like "John Doe") continue to work correctly. + Notes: ====== This should be RFC 2822 compliant, although it will let a few obsolete RFC 822 addresses through such as `test"test"test@xyz.com` (note the quoted string in the middle of the address, which may be obsolete as of RFC 2822). However it wont allow escaping outside of quotes such as `test@test@xyz.com`. This would have to be written as `"test@test"@xyz.com` @@ -51,7 +105,7 @@ how-about-comments(this is a comment!!)@xyz.com ```php /** * function parse($emails, $multiple = true, $encoding = 'UTF-8') - * @param string $emails List of Email addresses separated by comma or space if multiple + * @param string $emails List of Email addresses separated by configured separators (comma, semicolon, whitespace by default) * @param bool $multiple (optional, default: true) Whether to parse for multiple email addresses or not * @param string $encoding (optional, default: 'UTF-8')The encoding if not 'UTF-8' * @return: see below: */ diff --git a/src/Parse.php b/src/Parse.php index 59c44f0..29773fe 100644 --- a/src/Parse.php +++ b/src/Parse.php @@ -34,9 +34,9 @@ class Parse protected ?LoggerInterface $logger = null; /** - * @var ?ParseOptions + * @var ParseOptions */ - protected ?ParseOptions $options; + protected ParseOptions $options; /** * Allow Parse to be instantiated as a singleton. @@ -86,9 +86,9 @@ public function setOptions(ParseOptions $options): Parse } /** - * @return ?ParseOptions + * @return ParseOptions */ - public function getOptions(): ?ParseOptions + public function getOptions(): ParseOptions { return $this->options; } @@ -276,12 +276,10 @@ public function parse(string $emails, bool $multiple = true, string $encoding = case self::STATE_SKIP_AHEAD: // Skip ahead is set when a bad email address is encountered // It's supposed to skip to the next delimiter and continue parsing from there - if ($multiple && - (' ' == $curChar || - "\r" == $curChar || - "\n" == $curChar || - "\t" == $curChar || - ',' == $curChar)) { + $isWhitespaceSeparator = $this->options->getUseWhitespaceAsSeparator() && + (' ' == $curChar || "\r" == $curChar || "\n" == $curChar || "\t" == $curChar); + + if ($multiple && ($isWhitespaceSeparator || isset($this->options->getSeparators()[$curChar]))) { $state = self::STATE_END_ADDRESS; } else { $emailAddress['original_address'] .= $curChar; @@ -313,7 +311,7 @@ public function parse(string $emails, bool $multiple = true, string $encoding = // Fall through // no break case self::STATE_ADDRESS: - if (',' != $curChar || !$multiple) { + if (!isset($this->options->getSeparators()[$curChar]) || !$multiple) { $emailAddress['original_address'] .= $curChar; } @@ -323,8 +321,8 @@ public function parse(string $emails, bool $multiple = true, string $encoding = $commentNestLevel = 1; break; - } elseif (',' == $curChar) { - // Handle Comma + } elseif (isset($this->options->getSeparators()[$curChar])) { + // Handle separator (comma, semicolon, etc.) if ($multiple && (self::STATE_DOMAIN == $subState || self::STATE_AFTER_DOMAIN == $subState)) { // If we're already in the domain part, this should be the end of the address $state = self::STATE_END_ADDRESS; @@ -333,9 +331,9 @@ public function parse(string $emails, bool $multiple = true, string $encoding = } else { $emailAddress['invalid'] = true; if ($multiple || ($i + 5) >= $len) { - $emailAddress['invalid_reason'] = 'Misplaced Comma or missing "@" symbol'; + $emailAddress['invalid_reason'] = 'Misplaced separator or missing "@" symbol'; } else { - $emailAddress['invalid_reason'] = 'Comma not permitted - only one email address allowed'; + $emailAddress['invalid_reason'] = 'Separator not permitted - only one email address allowed'; } } } elseif (' ' == $curChar || @@ -366,8 +364,10 @@ public function parse(string $emails, bool $multiple = true, string $encoding = $emailAddress['invalid'] = true; $emailAddress['invalid_reason'] = 'Email Address contains whitespace'; } - } elseif (self::STATE_DOMAIN == $subState || self::STATE_AFTER_DOMAIN == $subState) { - // If we're already in the domain part, this should be the end of the whole address + } elseif ($this->options->getUseWhitespaceAsSeparator() && + (self::STATE_DOMAIN == $subState || self::STATE_AFTER_DOMAIN == $subState)) { + // If we're already in the domain part and whitespace is a separator, + // this should be the end of the whole address $state = self::STATE_END_ADDRESS; break; diff --git a/src/ParseOptions.php b/src/ParseOptions.php index 10f1594..a8ed979 100644 --- a/src/ParseOptions.php +++ b/src/ParseOptions.php @@ -4,15 +4,29 @@ class ParseOptions { + /** @var array */ private array $bannedChars = []; + /** @var array */ + private array $separators = []; + private bool $useWhitespaceAsSeparator = true; - public function __construct(array $bannedChars = []) + /** + * @param array $bannedChars + * @param array $separators + * @param bool $useWhitespaceAsSeparator + */ + public function __construct(array $bannedChars = [], array $separators = [','], bool $useWhitespaceAsSeparator = true) { if ($bannedChars) { $this->setBannedChars($bannedChars); } + $this->setSeparators($separators); + $this->useWhitespaceAsSeparator = $useWhitespaceAsSeparator; } + /** + * @param array $bannedChars + */ public function setBannedChars(array $bannedChars): void { $this->bannedChars = []; @@ -22,10 +36,39 @@ public function setBannedChars(array $bannedChars): void } /** - * @return array + * @return array */ public function getBannedChars(): array { return $this->bannedChars; } + + /** + * @param array $separators + */ + public function setSeparators(array $separators): void + { + $this->separators = []; + foreach ($separators as $separator) { + $this->separators[$separator] = true; + } + } + + /** + * @return array + */ + public function getSeparators(): array + { + return $this->separators; + } + + public function setUseWhitespaceAsSeparator(bool $useWhitespaceAsSeparator): void + { + $this->useWhitespaceAsSeparator = $useWhitespaceAsSeparator; + } + + public function getUseWhitespaceAsSeparator(): bool + { + return $this->useWhitespaceAsSeparator; + } } diff --git a/tests/ParseTest.php b/tests/ParseTest.php index 549dc60..28ce1a3 100644 --- a/tests/ParseTest.php +++ b/tests/ParseTest.php @@ -6,6 +6,7 @@ require_once __DIR__.'/../src/Parse.php'; use Email\Parse; +use Email\ParseOptions; class ParseTest extends \PHPUnit\Framework\TestCase { @@ -18,7 +19,14 @@ public function testParseEmailAddresses() $multiple = $test['multiple']; $result = $test['result']; - $this->assertSame($result, Parse::getInstance()->parse($emails, $multiple)); + // Check if test specifies use_whitespace_as_separator option + $useWhitespaceAsSeparator = $test['use_whitespace_as_separator'] ?? true; + + // Configure Parse to support both comma and semicolon as separators + $options = new ParseOptions(['%', '!'], [',', ';'], $useWhitespaceAsSeparator); + $parser = new Parse(null, $options); + + $this->assertSame($result, $parser->parse($emails, $multiple)); } } } diff --git a/tests/testspec.yml b/tests/testspec.yml index 8f0e779..52d7285 100644 --- a/tests/testspec.yml +++ b/tests/testspec.yml @@ -274,7 +274,7 @@ domain: asdf.ghjkl.com ip: '' invalid: true - invalid_reason: 'Comma not permitted - only one email address allowed' + invalid_reason: 'Separator not permitted - only one email address allowed' - emails: 'tnam e@asdf.g asdfa hjkl.com, tn''''''ame@asdf.ghjkl.com, tname-test1@asdf.ghjkl.com' multiple: true @@ -307,7 +307,7 @@ domain: '' ip: '' invalid: true - invalid_reason: 'Misplaced Comma or missing "@" symbol' + invalid_reason: 'Misplaced separator or missing "@" symbol' - address: 'tn''''''ame@asdf.ghjkl.com' simple_address: 'tn''''''ame@asdf.ghjkl.com' @@ -1048,7 +1048,7 @@ domain: '' ip: 'IPv6:2001:4860:4860::8888' invalid: true - invalid_reason: 'Comma not permitted - only one email address allowed' + invalid_reason: 'Separator not permitted - only one email address allowed' - emails: 'testing@[0.0.0.0]' multiple: false @@ -1501,3 +1501,217 @@ ip: '' invalid: false invalid_reason: null +- + emails: 'test1@example.com test2@example.com' + multiple: true + use_whitespace_as_separator: false + result: + success: false + reason: 'Invalid email address' + email_addresses: + - + address: '' + simple_address: '' + original_address: 'test1@example.com test2@example.com' + name: '' + name_parsed: '' + local_part: test1 + local_part_parsed: test1 + domain_part: example.comtest2 + domain: example.comtest2 + ip: '' + invalid: true + invalid_reason: 'Multiple at ''@'' symbols in email address' +- + emails: 'test1@example.com; test2@example.com' + multiple: true + use_whitespace_as_separator: false + result: + success: true + reason: null + email_addresses: + - + address: test1@example.com + simple_address: test1@example.com + original_address: test1@example.com + name: '' + name_parsed: '' + local_part: test1 + local_part_parsed: test1 + domain_part: example.com + domain: example.com + ip: '' + invalid: false + invalid_reason: null + - + address: test2@example.com + simple_address: test2@example.com + original_address: test2@example.com + name: '' + name_parsed: '' + local_part: test2 + local_part_parsed: test2 + domain_part: example.com + domain: example.com + ip: '' + invalid: false + invalid_reason: null +- + emails: 'John Doe , Jane Smith ' + multiple: true + use_whitespace_as_separator: false + result: + success: true + reason: null + email_addresses: + - + address: 'John Doe ' + simple_address: john@example.com + original_address: 'John Doe ' + name: 'John Doe' + name_parsed: 'John Doe' + local_part: john + local_part_parsed: john + domain_part: example.com + domain: example.com + ip: '' + invalid: false + invalid_reason: null + - + address: 'Jane Smith ' + simple_address: jane@example.com + original_address: 'Jane Smith ' + name: 'Jane Smith' + name_parsed: 'Jane Smith' + local_part: jane + local_part_parsed: jane + domain_part: example.com + domain: example.com + ip: '' + invalid: false + invalid_reason: null +- + emails: 'test1@example.com; test2@example.com; test3@example.org' + multiple: true + result: + success: true + reason: null + email_addresses: + - + address: test1@example.com + simple_address: test1@example.com + original_address: test1@example.com + name: '' + name_parsed: '' + local_part: test1 + local_part_parsed: test1 + domain_part: example.com + domain: example.com + ip: '' + invalid: false + invalid_reason: null + - + address: test2@example.com + simple_address: test2@example.com + original_address: test2@example.com + name: '' + name_parsed: '' + local_part: test2 + local_part_parsed: test2 + domain_part: example.com + domain: example.com + ip: '' + invalid: false + invalid_reason: null + - + address: test3@example.org + simple_address: test3@example.org + original_address: test3@example.org + name: '' + name_parsed: '' + local_part: test3 + local_part_parsed: test3 + domain_part: example.org + domain: example.org + ip: '' + invalid: false + invalid_reason: null +- + emails: 'John Doe ; Jane Smith ' + multiple: true + result: + success: true + reason: null + email_addresses: + - + address: 'John Doe ' + simple_address: john@example.com + original_address: 'John Doe ' + name: 'John Doe' + name_parsed: 'John Doe' + local_part: john + local_part_parsed: john + domain_part: example.com + domain: example.com + ip: '' + invalid: false + invalid_reason: null + - + address: 'Jane Smith ' + simple_address: jane@example.com + original_address: 'Jane Smith ' + name: 'Jane Smith' + name_parsed: 'Jane Smith' + local_part: jane + local_part_parsed: jane + domain_part: example.com + domain: example.com + ip: '' + invalid: false + invalid_reason: null +- + emails: 'test1@example.com, test2@example.com; test3@example.org' + multiple: true + result: + success: true + reason: null + email_addresses: + - + address: test1@example.com + simple_address: test1@example.com + original_address: test1@example.com + name: '' + name_parsed: '' + local_part: test1 + local_part_parsed: test1 + domain_part: example.com + domain: example.com + ip: '' + invalid: false + invalid_reason: null + - + address: test2@example.com + simple_address: test2@example.com + original_address: test2@example.com + name: '' + name_parsed: '' + local_part: test2 + local_part_parsed: test2 + domain_part: example.com + domain: example.com + ip: '' + invalid: false + invalid_reason: null + - + address: test3@example.org + simple_address: test3@example.org + original_address: test3@example.org + name: '' + name_parsed: '' + local_part: test3 + local_part_parsed: test3 + domain_part: example.org + domain: example.org + ip: '' + invalid: false + invalid_reason: null