CREATE FUNCTION dbo.DecodeUTF8String (@value varchar(max))
RETURNS nvarchar(max)
AS
BEGIN
-- Check
if
the string is ASCII
or
null,
as
no conversion is needed.
IF (@value IS NULL
OR @value NOT LIKE
'%[^ -~]%'
COLLATE Latin1_General_BIN
)
RETURN @value;
DECLARE @result nvarchar(max);
-- Split the UTF-8 string into octets
and
calculate their codepoints.
WITH octets AS
(
SELECT position, highorderones, partialcodepoint
FROM numbers a
CROSS APPLY (SELECT octet = ASCII(SUBSTRING(@value, position, 1))) b
CROSS APPLY (SELECT highorderones = 8 -
FLOOR
(LOG( ~CONVERT(tinyint, octet) * 2 + 1)/LOG(2))) c
CROSS APPLY (SELECT databits = 7 - highorderones) d
CROSS APPLY (SELECT partialcodepoint = octet % POWER(2, databits)) e
),
codepoints AS
(
SELECT position, codepoint
FROM
(
SELECT position, highorderones, partialcodepoint
FROM octets
WHERE highorderones <> 1
) lead
CROSS APPLY (SELECT sequencelength = CASE WHEN highorderones in (1,2,3,4) THEN highorderones ELSE 1
END
) b
CROSS APPLY (SELECT endposition = position + sequencelength - 1) c
CROSS APPLY
(
SELECT codepoint = SUM(POWER(2, shiftleft) * partialcodepoint)
FROM octets
CROSS APPLY (SELECT shiftleft = 6 * (endposition - position)) b
WHERE position BETWEEN lead.position AND endposition
) d
)
-- Concatenate the codepoints into a Unicode string.
SELECT @result = CONVERT(xml,
(
SELECT NCHAR(codepoint)
FROM codepoints
ORDER BY position
FOR XML PATH(
''
)
)).value(
'.'
,
'nvarchar(max)'
);
RETURN @result;
END
GO