I was implementing DBT in one of my project and I needed to keep the same behavior than shortuuid python package.
Shortuuid is basically encode UUID to base57. Based on shortuuid code, I reproduced the algorythm in SQL. Here are the 3 fonctions I used to manage it.
Convert UUID to INT
CREATE OR REPLACE FUNCTION uuid_to_int(uuid UUID)
RETURNS numeric AS $$
DECLARE
uuid_str text;
result numeric := 0;
hex_digit text;
BEGIN
-- Remove dash
uuid_str := replace(uuid::text, '-', '');
-- Convert hex to numeric
FOR i IN 1..32 LOOP
hex_digit := substr(uuid_str, i, 1);
result := result * 16 + (
CASE
WHEN hex_digit BETWEEN '0' AND '9' THEN ascii(hex_digit) - ascii('0')
WHEN hex_digit BETWEEN 'a' AND 'f' THEN ascii(hex_digit) - ascii('a') + 10
WHEN hex_digit BETWEEN 'A' AND 'F' THEN ascii(hex_digit) - ascii('A') + 10
END
);
END LOOP;
RETURN result;
END;
$$ LANGUAGE plpgsql IMMUTABLE STRICT;
Safe division
Because PostgreSQL is not exact while dividing huge numbers
CREATE OR REPLACE FUNCTION safe_divmod(n numeric, d numeric)
RETURNS TABLE(quotient numeric, remainder numeric) AS $$
DECLARE
q numeric;
r numeric;
BEGIN
q := trunc(n/d);
r := n - (q * d);
IF r < 0 THEN
r := r + d;
q := q - 1;
END IF;
RETURN QUERY SELECT q, r;
END;
$$ LANGUAGE plpgsql IMMUTABLE STRICT;
Encoding in base 57
CREATE OR REPLACE FUNCTION encode_base57(uuid UUID)
RETURNS text AS $$
DECLARE
alphabet text := '23456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz'; -- From shortuuid code
result text := '';
val numeric;
remainder int;
BEGIN
val := uuid_to_int(uuid);
-- Convert to base57
WHILE val > 0 LOOP
SELECT * FROM safe_divmod(val, 57) INTO val, remainder;
result := substr(alphabet, remainder + 1, 1) || result;
END LOOP;
-- Padding
WHILE length(result) < 22 LOOP
result := substr(alphabet, 1, 1) || result;
END LOOP;
RETURN result;
END;
$$ LANGUAGE plpgsql IMMUTABLE STRICT;
Let’s try it !
>>> import shortuuid
>>> import uuid
>>> u = uuid.uuid4()
>>> u
UUID('3b97cb7c-29af-4159-8f3a-f91a52109453')
>>> shortuuid.encode(u)
'CcNeZTyUbVZz6dWrNo5sHn'
qfdmo=# select encode_base57('3b97cb7c-29af-4159-8f3a-f91a52109453');
encode_base57
------------------------
CcNeZTyUbVZz6dWrNo5sHn
(1 row)
Yeah !!! We’ve got the same result 🥳