La versión 1 es como la más corta en terminos de caracteres.
La versión 2 es como una versión un poco diferente a la 1, tiene un argumento más que es un número entero, y lo que hace según mis pruebas es que si supera el número de diferencias de caracteres me da como resultado un -1.
Y la versión 3, es una versión que creo que toca modificarla alguito, porque me esta sacando error.
Entonces doy las gracias a:
Arnold Fribble http://www.sqlteam.com/forums/topic.asp?TOPIC_ID=51540
http://www.sqlteam.com/forums/topic.asp?TOPIC_ID=66781
Y a Joseph http://www.merriampark.com/ldtsql.htm
Los códigos son:
VERSIÓN 1
CREATE FUNCTION edit_distance(@s1 nvarchar(3999), @s2 nvarchar(3999))
RETURNS int
AS
BEGIN
DECLARE @s1_len int, @s2_len int, @i int, @j int, @s1_char nchar, @c int, @c_temp int,
@cv0 varbinary(8000), @cv1 varbinary(8000)
SELECT @s1_len = LEN(@s1), @s2_len = LEN(@s2), @cv1 = 0x0000, @j = 1, @i = 1, @c = 0
WHILE @j <= @s2_len
SELECT @cv1 = @cv1 + CAST(@j AS binary(2)), @j = @j + 1
WHILE @i <= @s1_len
BEGIN
SELECT @s1_char = SUBSTRING(@s1, @i, 1), @c = @i, @cv0 = CAST(@i AS binary(2)), @j = 1
WHILE @j <= @s2_len
BEGIN
SET @c = @c + 1
SET @c_temp = CAST(SUBSTRING(@cv1, @j+@j-1, 2) AS int) +
CASE WHEN @s1_char = SUBSTRING(@s2, @j, 1) THEN 0 ELSE 1 END
IF @c > @c_temp SET @c = @c_temp
SET @c_temp = CAST(SUBSTRING(@cv1, @j+@j+1, 2) AS int)+1
IF @c > @c_temp SET @c = @c_temp
SELECT @cv0 = @cv0 + CAST(@c AS binary(2)), @j = @j + 1
END
SELECT @cv1 = @cv0, @i = @i + 1
END
RETURN @c
END
VERSIÓN 2
SET QUOTED_IDENTIFIER ON
GO
SET ANSI_NULLS ON
GO
CREATE FUNCTION edit_distance_within(@s nvarchar(4000), @t nvarchar(4000), @d int)
RETURNS int
AS
BEGIN
DECLARE @sl int, @tl int, @i int, @j int, @sc nchar, @c int, @c1 int,
@cv0 nvarchar(4000), @cv1 nvarchar(4000), @cmin int
SELECT @sl = LEN(@s), @tl = LEN(@t), @cv1 = '', @j = 1, @i = 1, @c = 0
WHILE @j <= @tl
SELECT @cv1 = @cv1 + NCHAR(@j), @j = @j + 1
WHILE @i <= @sl
BEGIN
SELECT @sc = SUBSTRING(@s, @i, 1), @c1 = @i, @c = @i, @cv0 = '', @j = 1, @cmin = 4000
WHILE @j <= @tl
BEGIN
SET @c = @c + 1
SET @c1 = @c1 - CASE WHEN @sc = SUBSTRING(@t, @j, 1) THEN 1 ELSE 0 END
IF @c > @c1 SET @c = @c1
SET @c1 = UNICODE(SUBSTRING(@cv1, @j, 1)) + 1
IF @c > @c1 SET @c = @c1
IF @c < @cmin SET @cmin = @c
SELECT @cv0 = @cv0 + NCHAR(@c), @j = @j + 1
END
IF @cmin > @d BREAK
SELECT @cv1 = @cv0, @i = @i + 1
END
RETURN CASE WHEN @cmin <= @d AND @c <= @d THEN @c ELSE -1 END
END
GO
VERSIÓN 3
CREATE function LEVENSHTEIN( @s varchar(50), @t varchar(50) ) --Returns the Levenshtein Distance between strings s1 and s2. --Original developer: Michael Gilleland http://www.merriampark.com/ld.htm --Translated to TSQL by Joseph Gama returns varchar(50) as BEGIN DECLARE @d varchar(100), @LD int, @m int, @n int, @i int, @j int, @s_i char(1), @t_j char(1),@cost int --Step 1 SET @n=LEN(@s) SET @m=LEN(@t) SET @d=replicate(CHAR(0),100) If @n = 0 BEGIN SET @LD = @m GOTO done END If @m = 0 BEGIN SET @LD = @n GOTO done END --Step 2 SET @i=0 WHILE @i<=@n BEGIN SET @d=STUFF(@d,@i+1,1,CHAR(@i))--d(i, 0) = i SET @i=@i+1 END SET @i=0 WHILE @i<=@m BEGIN SET @d=STUFF(@d,@i*(@n+1)+1,1,CHAR(@i))--d(0, j) = j SET @i=@i+1 END --goto done --Step 3 SET @i=1 WHILE @i<=@n BEGIN SET @s_i=(substring(@s,@i,1)) --Step 4 SET @j=1 WHILE @j<=@m BEGIN SET @t_j=(substring(@t,@j,1)) --Step 5 If @s_i = @t_j SET @cost=0 ELSE SET @cost=1 --Step 6 SET @d=STUFF(@d,@j*(@n+1)+@i+1,1,CHAR(dbo.MIN3( ASCII(substring(@d,@j*(@n+1)+@i-1+1,1))+1, ASCII(substring(@d,(@j-1)*(@n+1)+@i+1,1))+1, ASCII(substring(@d,(@j-1)*(@n+1)+@i-1+1,1))+@cost) )) SET @j=@j+1 END SET @i=@i+1 END --Step 7 SET @LD = ASCII(substring(@d,@n*(@m+1)+@m+1,1)) done: --RETURN @LD --I kept this code that can be used to display the matrix with all calculated values --From Query Analyser it provides a nice way to check the algorithm in action -- RETURN @LD --declare @z varchar(255) --set @z='' --SET @i=0 --WHILE @i<=@n -- BEGIN -- SET @j=0 -- WHILE @j<=@m -- BEGIN -- set @z=@z+CONVERT(char(3),ASCII(substring(@d,@i*(@m+1 )+@j+1 ,1))) -- SET @j=@j+1 -- END -- SET @i=@i+1 -- END --print dbo.wrap(@z,3*(@n+1)) END
Sean felices! :) Y sientanse libres de opinar ;)
No hay comentarios:
Publicar un comentario