|
| 1 | +IF OBJECT_ID('dbo.udf_LCS') IS NULL |
| 2 | + EXECUTE ('CREATE FUNCTION dbo.udf_LCS() RETURNS nvarchar(max) AS RETURN SELECT 1 AS A;'); |
| 3 | +GO |
| 4 | + |
| 5 | + |
| 6 | +ALTER FUNCTION dbo.udf_LCS |
| 7 | + /** |
| 8 | +summary: > |
| 9 | + The longest common subsequence (LCS) problem is the problem of finding the |
| 10 | + longest subsequence common to all sequences in two sequences. It differs |
| 11 | + from problems of finding common substrings: unlike substrings, subsequences |
| 12 | + are not required to occupy consecutive positions within the original |
| 13 | + sequences. For example, the sequences "1234" and "1224533324" have an LCS of "1234" |
| 14 | +Author: Phil Factor |
| 15 | +Revision: 1.1 |
| 16 | +Created Date: 2019-04-05 |
| 17 | +Modified date: 2019-07-08 Konstantin Taranov |
| 18 | +Original link: https://www.red-gate.com/simple-talk/blogs/using-json-for-matrices-in-sql-server/ |
| 19 | +example: |
| 20 | + code: |
| 21 | + SELECT dbo.udf_LCS ('1234', '1224533324'); |
| 22 | + SELECT dbo.udf_LCS ('thisisatest', 'testing123testing'); |
| 23 | + SELECT dbo.udf_LCS ( 'XMJYAUZ', 'MZJAWXU'); |
| 24 | + SELECT dbo.udf_LCS ( 'beginning-middle-ending', 'beginning-diddle-dum-ending'); |
| 25 | +returns: > |
| 26 | + the longest common subsequence as a string |
| 27 | +**/ |
| 28 | + (@xString varchar(max), @yString varchar(max)) |
| 29 | +RETURNS varchar(max) |
| 30 | +AS |
| 31 | + BEGIN |
| 32 | + |
| 33 | + DECLARE @ii int = 1; --inner index |
| 34 | + DECLARE @jj int = 1; --next loop index |
| 35 | + DECLARE @West int; --array reference number to left |
| 36 | + DECLARE @NorthWest int; --array reference previous left |
| 37 | + DECLARE @North int; --array reference previous |
| 38 | + DECLARE @Max int; --holds the maximum of two values |
| 39 | + DECLARE @Current int; --current number of matches |
| 40 | + DECLARE @Matrix nvarchar(max); |
| 41 | + DECLARE @PreviousRow nvarchar(2000); -- the previous matrix row |
| 42 | + DECLARE @JSON nvarchar(4000); --json work variable |
| 43 | + DECLARE @Numbers TABLE (jj int); |
| 44 | +-- SQL Prompt formatting off |
| 45 | + |
| 46 | +INSERT INTO @Numbers(jj) --this is designed for words of max 40 characters |
| 47 | +VALUES(1),(2),(3),(4),(5),(6),(7),(8),(9),(10),(11),(12),(13),(14),(15), |
| 48 | + (16),(17),(18),(19),(20),(21),(22),(23),(24),(25),(26),(27),(28), |
| 49 | + (29),(30),(31),(32),(33),(34),(35),(36),(37),(38),(39),(40) |
| 50 | +-- SQL Prompt formatting on |
| 51 | +--the to start with, the first row is all zeros. |
| 52 | + SELECT @PreviousRow = |
| 53 | + N'[' + Replicate('0,', Len(@xString) + 1) + N'"' |
| 54 | + + Substring(@yString, 1, 1) + N'"]'; |
| 55 | + SELECT @Matrix = @PreviousRow;--add this to the matrix |
| 56 | + /* we now build the matrix in bottom up fashion. */ |
| 57 | + WHILE (@ii <= Len(@yString)) |
| 58 | + BEGIN |
| 59 | + SELECT @West = 0, @JSON = NULL; |
| 60 | + --now create a row in just one query |
| 61 | + SELECT @NorthWest = |
| 62 | + Json_Value(@PreviousRow, '$[' + Cast(jj - 1 AS varchar(5)) + ']'), |
| 63 | + @North = |
| 64 | + Json_Value(@PreviousRow, '$[' + Cast(jj AS varchar(5)) + ']'), |
| 65 | + @Max = CASE WHEN @West > @North THEN @West ELSE @North END, |
| 66 | + @Current = |
| 67 | + CASE WHEN Substring(@xString, jj, 1) = Substring(@yString, @ii, 1) THEN |
| 68 | + @NorthWest + 1 ELSE @Max END, |
| 69 | + @JSON = |
| 70 | + Coalesce(@JSON + ',', '[0,') |
| 71 | + + Coalesce(Cast(@Current AS varchar(5)), 'null'), @West = @Current |
| 72 | + FROM @Numbers AS f |
| 73 | + WHERE f.jj <= Len(@xString); |
| 74 | + --and store the result as the previous row |
| 75 | + SELECT @PreviousRow = |
| 76 | + @JSON + N',"' + Substring(@yString, @ii, 1) + N'"]'; |
| 77 | + --and add the reow to the matrix |
| 78 | + SELECT @Matrix = Coalesce(@Matrix + ', |
| 79 | + ', '') + @PreviousRow, @ii = @ii + 1; |
| 80 | + END; |
| 81 | + --we add the boundong brackets. |
| 82 | + SELECT @Matrix = N'[' + @Matrix + N']'; |
| 83 | + SELECT @ii = Len(@yString), @jj = Len(@xString); |
| 84 | + DECLARE @previousColScore INT, @PreviousRowScore INT, @Ychar NCHAR; |
| 85 | + DECLARE @Subsequence NVARCHAR(4000) = ''; |
| 86 | + WHILE (@Current > 0) |
| 87 | + BEGIN |
| 88 | + SELECT @Ychar = Substring(@yString, @ii, 1); |
| 89 | + IF (@Ychar = Substring(@xString, @jj, 1)) |
| 90 | +-- If current character in X[] and Y[] are same, then it is part of LCS |
| 91 | + SELECT @ii = @ii - 1, @jj = @jj - 1, |
| 92 | + @Subsequence = @Ychar + @Subsequence, @Current = @Current - 1; |
| 93 | + ELSE |
| 94 | +--If not same, then find the larger of two and traverse in that direction |
| 95 | + BEGIN |
| 96 | + --find out the two scores, one to the north and one to the west |
| 97 | + SELECT @PreviousRowScore = |
| 98 | + Json_Value( |
| 99 | + @Matrix, |
| 100 | + 'strict $[' + Convert(varchar(5), @ii - 1) + '][' |
| 101 | + + Convert(varchar(5), @jj) + ']' |
| 102 | + ), |
| 103 | + @previousColScore = |
| 104 | + Json_Value( |
| 105 | + @Matrix, |
| 106 | + 'strict $[' + Convert(varchar(5), @ii) + '][' |
| 107 | + + Convert(varchar(5), @jj - 1) + ']' |
| 108 | + ); |
| 109 | + --either go north or west |
| 110 | + IF @PreviousRowScore < @previousColScore SELECT @jj = @jj - 1; |
| 111 | + ELSE SELECT @ii = @ii - 1; |
| 112 | + END; |
| 113 | + END; |
| 114 | + RETURN @Subsequence; |
| 115 | + END; |
| 116 | +GO |
0 commit comments