Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 23 additions & 0 deletions src/Condition.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
<?php

declare(strict_types=1);

namespace Doctrine\SqlFormatter;

/**
* Conditions that end a block.
*/
final class Condition
{
/** @var int[] */
public $types = [];

/** @var string[] */
public $values = [];

/** @var bool */
public $eof = false;

/** @var bool */
public $addNewline = false;
}
290 changes: 156 additions & 134 deletions src/SqlFormatter.php

Large diffs are not rendered by default.

73 changes: 66 additions & 7 deletions src/Token.php
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
namespace Doctrine\SqlFormatter;

use function in_array;
use function strpos;

final class Token
{
Expand Down Expand Up @@ -55,15 +54,75 @@ public function isOfType(int ...$types): bool
return in_array($this->type, $types, true);
}

public function hasExtraWhitespace(): bool
public function withValue(string $value): self
{
return strpos($this->value(), ' ') !== false ||
strpos($this->value(), "\n") !== false ||
strpos($this->value(), "\t") !== false;
return new self($this->type(), $value);
}

public function withValue(string $value): self
public function isBlockStart(): ?Condition
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I do not think this is fully possible this way. There are ambiguous tokens (and group of tokens) as we never know the full grammar nor parse the full grammar.

With this said, we cannot tell if a token represents a "block start/end" or not by just looking around. In order to implement this and fix #118 (and prevent related issues), I think we need to parse the tokens into some intermediate tree data structure in "try consume" fashion first.

With this approach the tree of parsed blocks will always be correct (otherwise they will be kept unparsed/as tokens) and the formatter can then be hugely simplified.

{
return new self($this->type(), $value);
$condition = new Condition();

if ($this->value === '(') {
$condition->values = [')'];
$condition->addNewline = true;

return $condition;
}

if ($this->value === 'CASE WHEN' || $this->value === 'ELSE') {
$condition->values = ['ELSE', 'END'];

return $condition;
}

if ($this->value === 'CASE') {
$condition->values = ['END'];

return $condition;
}

$joins = [
'LEFT OUTER JOIN',
'RIGHT OUTER JOIN',
'LEFT JOIN',
'RIGHT JOIN',
'OUTER JOIN',
'INNER JOIN',
'CROSS JOIN',
'JOIN',
];
if (in_array($this->value, $joins, true)) {
$condition->values = $joins;
$condition->types = [self::TOKEN_TYPE_RESERVED_TOPLEVEL];
$condition->eof = true;

return $condition;
}

return null;
}

public function isBlockEnd(Condition $condition): bool
{
if ($this->isOfType(...$condition->types)) {
return true;
}

return in_array($this->value, $condition->values, true);
}

public function wantsSpaceBefore(): bool
{
if (in_array($this->value, ['.', ',', ';', ')'], true)) {
return false;
}

return ! $this->isOfType(
self::TOKEN_TYPE_RESERVED_NEWLINE,
self::TOKEN_TYPE_RESERVED_TOPLEVEL,
self::TOKEN_TYPE_COMMENT,
self::TOKEN_TYPE_BLOCK_COMMENT
);
}
}
22 changes: 13 additions & 9 deletions src/Tokenizer.php
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
use function implode;
use function preg_match;
use function preg_quote;
use function preg_replace;
use function str_replace;
use function strlen;
use function strpos;
Expand Down Expand Up @@ -49,7 +50,6 @@ final class Tokenizer
'BINLOG',
'BOTH',
'CASCADE',
'CASE',
'CHANGE',
'CHANGED',
'CHARACTER SET',
Expand Down Expand Up @@ -92,9 +92,7 @@ final class Tokenizer
'DUMPFILE',
'DUPLICATE',
'DYNAMIC',
'ELSE',
'ENCLOSED',
'END',
'ENGINE',
'ENGINE_TYPE',
'ENGINES',
Expand Down Expand Up @@ -189,7 +187,6 @@ final class Tokenizer
'NOW()',
'NULL',
'OFFSET',
'ON',
'OPEN',
'OPTIMIZE',
'OPTION',
Expand Down Expand Up @@ -284,7 +281,6 @@ final class Tokenizer
'TABLES',
'TEMPORARY',
'TERMINATED',
'THEN',
'TIES',
'TO',
'TRAILING',
Expand All @@ -303,7 +299,6 @@ final class Tokenizer
'USING',
'VARIABLES',
'VIEW',
'WHEN',
'WITH',
'WORK',
'WRITE',
Expand Down Expand Up @@ -343,6 +338,7 @@ final class Tokenizer
'RANGE',
'GROUPS',
'WINDOW',
'ON DUPLICATE KEY UPDATE',
];

/** @var string[] */
Expand All @@ -353,11 +349,18 @@ final class Tokenizer
'RIGHT JOIN',
'OUTER JOIN',
'INNER JOIN',
'CROSS JOIN',
'JOIN',
'XOR',
'OR',
'AND',
'EXCLUDE',
'ON',
'CASE WHEN',
'CASE',
'WHEN',
'ELSE',
'END',
];

/** @var string[] */
Expand Down Expand Up @@ -669,6 +672,7 @@ final class Tokenizer
'UTC_TIME',
'UTC_TIMESTAMP',
'UUID',
'VALUES',
'VAR',
'VARIANCE',
'VARP',
Expand Down Expand Up @@ -909,7 +913,7 @@ private function createNextToken(string $string, ?Token $previous = null): Token
) {
return new Token(
Token::TOKEN_TYPE_RESERVED_TOPLEVEL,
substr($upper, 0, strlen($matches[1]))
(string) preg_replace('/\s+/', ' ', substr($upper, 0, strlen($matches[1])))
);
}

Expand All @@ -923,7 +927,7 @@ private function createNextToken(string $string, ?Token $previous = null): Token
) {
return new Token(
Token::TOKEN_TYPE_RESERVED_NEWLINE,
substr($upper, 0, strlen($matches[1]))
(string) preg_replace('/\s+/', ' ', substr($upper, 0, strlen($matches[1])))
);
}

Expand All @@ -937,7 +941,7 @@ private function createNextToken(string $string, ?Token $previous = null): Token
) {
return new Token(
Token::TOKEN_TYPE_RESERVED,
substr($upper, 0, strlen($matches[1]))
(string) preg_replace('/\s+/', ' ', substr($upper, 0, strlen($matches[1])))
);
}
}
Expand Down
Loading