doctrine · rinu · Aug 14, 2023 · Aug 14, 2023 · Aug 14, 2023 · Aug 15, 2023
diff --git a/src/Condition.php b/src/Condition.php
@@ -0,0 +1,23 @@
+<?php
+
+declare(strict_types=1);
+
+namespace Doctrine\SqlFormatter;
+
+/**
+ * Conditions that end a block.
+ */
+final class Condition
+{
+    /** @var int[] */
+    public $types = [];
+
+    /** @var string[] */
+    public $values = [];
+
+    /** @var bool */
+    public $eof = false;
+
+    /** @var bool */
+    public $addNewline = false;
+}
diff --git a/src/SqlFormatter.php b/src/SqlFormatter.php
diff --git a/src/Token.php b/src/Token.php
@@ -5,7 +5,6 @@
 namespace Doctrine\SqlFormatter;
 
 use function in_array;
-use function strpos;
 
 final class Token
 {
@@ -55,15 +54,75 @@ public function isOfType(int ...$types): bool
         return in_array($this->type, $types, true);
     }
 
-    public function hasExtraWhitespace(): bool
+    public function withValue(string $value): self
     {
-        return strpos($this->value(), ' ') !== false ||
-            strpos($this->value(), "\n") !== false ||
-            strpos($this->value(), "\t") !== false;
+        return new self($this->type(), $value);
     }
 
-    public function withValue(string $value): self
+    public function isBlockStart(): ?Condition
     {
-        return new self($this->type(), $value);
+        $condition = new Condition();
+
+        if ($this->value === '(') {
+            $condition->values     = [')'];
+            $condition->addNewline = true;
+
+            return $condition;
+        }
+
+        if ($this->value === 'CASE WHEN' || $this->value === 'ELSE') {
+            $condition->values = ['ELSE', 'END'];
+
+            return $condition;
+        }
+
+        if ($this->value === 'CASE') {
+            $condition->values = ['END'];
+
+            return $condition;
+        }
+
+        $joins = [
+            'LEFT OUTER JOIN',
+            'RIGHT OUTER JOIN',
+            'LEFT JOIN',
+            'RIGHT JOIN',
+            'OUTER JOIN',
+            'INNER JOIN',
+            'CROSS JOIN',
+            'JOIN',
+        ];
+        if (in_array($this->value, $joins, true)) {
+            $condition->values = $joins;
+            $condition->types  = [self::TOKEN_TYPE_RESERVED_TOPLEVEL];
+            $condition->eof    = true;
+
+            return $condition;
+        }
+
+        return null;
+    }
+
+    public function isBlockEnd(Condition $condition): bool
+    {
+        if ($this->isOfType(...$condition->types)) {
+            return true;
+        }
+
+        return in_array($this->value, $condition->values, true);
+    }
+
+    public function wantsSpaceBefore(): bool
+    {
+        if (in_array($this->value, ['.', ',', ';', ')'], true)) {
+            return false;
+        }
+
+        return ! $this->isOfType(
+            self::TOKEN_TYPE_RESERVED_NEWLINE,
+            self::TOKEN_TYPE_RESERVED_TOPLEVEL,
+            self::TOKEN_TYPE_COMMENT,
+            self::TOKEN_TYPE_BLOCK_COMMENT
+        );
     }
 }
diff --git a/src/Tokenizer.php b/src/Tokenizer.php
@@ -12,6 +12,7 @@
 use function implode;
 use function preg_match;
 use function preg_quote;
+use function preg_replace;
 use function str_replace;
 use function strlen;
 use function strpos;
@@ -49,7 +50,6 @@ final class Tokenizer
         'BINLOG',
         'BOTH',
         'CASCADE',
-        'CASE',
         'CHANGE',
         'CHANGED',
         'CHARACTER SET',
@@ -92,9 +92,7 @@ final class Tokenizer
         'DUMPFILE',
         'DUPLICATE',
         'DYNAMIC',
-        'ELSE',
         'ENCLOSED',
-        'END',
         'ENGINE',
         'ENGINE_TYPE',
         'ENGINES',
@@ -189,7 +187,6 @@ final class Tokenizer
         'NOW()',
         'NULL',
         'OFFSET',
-        'ON',
         'OPEN',
         'OPTIMIZE',
         'OPTION',
@@ -284,7 +281,6 @@ final class Tokenizer
         'TABLES',
         'TEMPORARY',
         'TERMINATED',
-        'THEN',
         'TIES',
         'TO',
         'TRAILING',
@@ -303,7 +299,6 @@ final class Tokenizer
         'USING',
         'VARIABLES',
         'VIEW',
-        'WHEN',
         'WITH',
         'WORK',
         'WRITE',
@@ -343,6 +338,7 @@ final class Tokenizer
         'RANGE',
         'GROUPS',
         'WINDOW',
+        'ON DUPLICATE KEY UPDATE',
     ];
 
     /** @var string[] */
@@ -353,11 +349,18 @@ final class Tokenizer
         'RIGHT JOIN',
         'OUTER JOIN',
         'INNER JOIN',
+        'CROSS JOIN',
         'JOIN',
         'XOR',
         'OR',
         'AND',
         'EXCLUDE',
+        'ON',
+        'CASE WHEN',
+        'CASE',
+        'WHEN',
+        'ELSE',
+        'END',
     ];
 
     /** @var string[] */
@@ -669,6 +672,7 @@ final class Tokenizer
         'UTC_TIME',
         'UTC_TIMESTAMP',
         'UUID',
+        'VALUES',
         'VAR',
         'VARIANCE',
         'VARP',
@@ -909,7 +913,7 @@ private function createNextToken(string $string, ?Token $previous = null): Token
             ) {
                 return new Token(
                     Token::TOKEN_TYPE_RESERVED_TOPLEVEL,
-                    substr($upper, 0, strlen($matches[1]))
+                    (string) preg_replace('/\s+/', ' ', substr($upper, 0, strlen($matches[1])))
                 );
             }
 
@@ -923,7 +927,7 @@ private function createNextToken(string $string, ?Token $previous = null): Token
             ) {
                 return new Token(
                     Token::TOKEN_TYPE_RESERVED_NEWLINE,
-                    substr($upper, 0, strlen($matches[1]))
+                    (string) preg_replace('/\s+/', ' ', substr($upper, 0, strlen($matches[1])))
                 );
             }
 
@@ -937,7 +941,7 @@ private function createNextToken(string $string, ?Token $previous = null): Token
             ) {
                 return new Token(
                     Token::TOKEN_TYPE_RESERVED,
-                    substr($upper, 0, strlen($matches[1]))
+                    (string) preg_replace('/\s+/', ' ', substr($upper, 0, strlen($matches[1])))
                 );
             }
         }