Convert a UUID in base57 with PostgreSQL (like Shortuuid)

I was implementing DBT in one of my project and I needed to keep the same behavior than shortuuid python package.

Shortuuid is basically encode UUID to base57. Based on shortuuid code, I reproduced the algorythm in SQL. Here are the 3 fonctions I used to manage it.

Convert UUID to INT

CREATE OR REPLACE FUNCTION uuid_to_int(uuid UUID)
RETURNS numeric AS $$
DECLARE
    uuid_str text;
    result numeric := 0;
    hex_digit text;
BEGIN
    -- Remove dash
    uuid_str := replace(uuid::text, '-', '');

    -- Convert hex to numeric
    FOR i IN 1..32 LOOP
        hex_digit := substr(uuid_str, i, 1);
        result := result * 16 + (
            CASE
                WHEN hex_digit BETWEEN '0' AND '9' THEN ascii(hex_digit) - ascii('0')
                WHEN hex_digit BETWEEN 'a' AND 'f' THEN ascii(hex_digit) - ascii('a') + 10
                WHEN hex_digit BETWEEN 'A' AND 'F' THEN ascii(hex_digit) - ascii('A') + 10
            END
        );
    END LOOP;

    RETURN result;
END;
$$ LANGUAGE plpgsql IMMUTABLE STRICT;

Safe division

Because PostgreSQL is not exact while dividing huge numbers

CREATE OR REPLACE FUNCTION safe_divmod(n numeric, d numeric)
RETURNS TABLE(quotient numeric, remainder numeric) AS $$
DECLARE
    q numeric;
    r numeric;
BEGIN
    q := trunc(n/d);
    r := n - (q * d);

    IF r < 0 THEN
        r := r + d;
        q := q - 1;
    END IF;

    RETURN QUERY SELECT q, r;
END;
$$ LANGUAGE plpgsql IMMUTABLE STRICT;

Encoding in base 57

CREATE OR REPLACE FUNCTION encode_base57(uuid UUID)
RETURNS text AS $$
DECLARE
    alphabet text := '23456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz'; -- From shortuuid code
    result text := '';
    val numeric;
    remainder int;
BEGIN
    val := uuid_to_int(uuid);

    -- Convert to base57
    WHILE val > 0 LOOP
        SELECT * FROM safe_divmod(val, 57) INTO val, remainder;
        result := substr(alphabet, remainder + 1, 1) || result;
    END LOOP;

    -- Padding
    WHILE length(result) < 22 LOOP
        result := substr(alphabet, 1, 1) || result;
    END LOOP;

    RETURN result;
END;
$$ LANGUAGE plpgsql IMMUTABLE STRICT;

Let’s try it !

>>> import shortuuid
>>> import uuid
>>> u = uuid.uuid4()
>>> u
UUID('3b97cb7c-29af-4159-8f3a-f91a52109453')
>>> shortuuid.encode(u)
'CcNeZTyUbVZz6dWrNo5sHn'
qfdmo=# select encode_base57('3b97cb7c-29af-4159-8f3a-f91a52109453');
     encode_base57
------------------------
 CcNeZTyUbVZz6dWrNo5sHn
(1 row)

Yeah !!! We’ve got the same result 🥳

Leave a Reply

Your email address will not be published. Required fields are marked *