~sircmpwn/sr.ht-dev

This thread contains a patchset. You're looking at the original emails, but you may wish to use the patch review UI. Review patch
5 3

[PATCH todo.sr.ht v2] todosrht: Use canonical user IDs

Details
Message ID
<20220714164737.23517-1-me@adnano.co>
DKIM signature
pass
Download raw message
Patch: +162 -0
Update user IDs across todo.sr.ht to match those of meta.sr.ht.
---
 .../4c70fa9bc46f_use_canonical_user_id.py     | 108 ++++++++++++++++++
 .../b5f503ac3ae9_add_user_remote_id.py        |  54 +++++++++
 2 files changed, 162 insertions(+)
 create mode 100644 todosrht/alembic/versions/4c70fa9bc46f_use_canonical_user_id.py
 create mode 100644 todosrht/alembic/versions/b5f503ac3ae9_add_user_remote_id.py

diff --git a/todosrht/alembic/versions/4c70fa9bc46f_use_canonical_user_id.py b/todosrht/alembic/versions/4c70fa9bc46f_use_canonical_user_id.py
new file mode 100644
index 0000000..d5c0cc6
--- /dev/null
+++ b/todosrht/alembic/versions/4c70fa9bc46f_use_canonical_user_id.py
@@ -0,0 +1,108 @@
"""Use canonical user ID

Revision ID: 4c70fa9bc46f
Revises: b5f503ac3ae9
Create Date: 2022-06-16 11:38:50.325723

"""

# revision identifiers, used by Alembic.
revision = '4c70fa9bc46f'
down_revision = 'b5f503ac3ae9'

from alembic import op
import sqlalchemy as sa


# These tables all have a column referencing "user"(id)
tables = [
    ("celery_webhook_subscription", "user_id"),
    ("event_notification", "user_id"),
    ("gql_ticket_wh_sub", "user_id"),
    ("gql_tracker_wh_sub", "user_id"),
    ("gql_user_wh_sub", "user_id"),
    ("oauthtoken", "user_id"),
    ("participant", "user_id"),
    ("ticket_assignee", "assignee_id"),
    ("ticket_assignee", "assigner_id"),
    ("ticket_label", "user_id"),
    ("ticket_webhook_subscription", "user_id"),
    ("tracker", "owner_id"),
    ("tracker_webhook_subscription", "user_id"),
    ("user_access", "user_id"),
    ("user_webhook_subscription", "user_id"),
    ("webhook_subscription", "user_id"),
]

def upgrade():
    # Drop unique constraints
    op.execute("""
    ALTER TABLE participant DROP CONSTRAINT participant_user_id_key;
    ALTER TABLE tracker DROP CONSTRAINT tracker_owner_id_name_unique;
    """)

    # Drop foreign key constraints and update user IDs
    for (table, col) in tables:
        op.execute(f"""
        ALTER TABLE {table} DROP CONSTRAINT {table}_{col}_fkey;
        UPDATE {table} t SET {col} = u.remote_id FROM "user" u WHERE u.id = t.{col};
        """)

    # Update primary key
    op.execute("""
    ALTER TABLE "user" DROP CONSTRAINT user_pkey;
    ALTER TABLE "user" DROP CONSTRAINT user_remote_id_key;
    ALTER TABLE "user" RENAME COLUMN id TO old_id;
    ALTER TABLE "user" RENAME COLUMN remote_id TO id;
    ALTER TABLE "user" ADD PRIMARY KEY (id);
    ALTER TABLE "user" ADD UNIQUE (old_id);
    """)

    # Add foreign key constraints
    for (table, col) in tables:
        op.execute(f"""
        ALTER TABLE {table} ADD CONSTRAINT {table}_{col}_fkey FOREIGN KEY ({col}) REFERENCES "user"(id) ON DELETE CASCADE;
        """)

    # Add unique constraints
    op.execute("""
    ALTER TABLE participant ADD CONSTRAINT participant_user_id_key UNIQUE (user_id);
    ALTER TABLE tracker ADD CONSTRAINT tracker_owner_id_name_unique UNIQUE (owner_id, name);
    """)


def downgrade():
    # Drop unique constraints
    op.execute("""
    ALTER TABLE participant DROP CONSTRAINT participant_user_id_key;
    ALTER TABLE tracker DROP CONSTRAINT tracker_owner_id_name_unique;
    """)

    # Drop foreign key constraints and update user IDs
    for (table, col) in tables:
        op.execute(f"""
        ALTER TABLE {table} DROP CONSTRAINT {table}_{col}_fkey;
        UPDATE {table} t SET {col} = u.old_id FROM "user" u WHERE u.id = t.{col};
        """)

    # Update primary key
    op.execute("""
    ALTER TABLE "user" DROP CONSTRAINT user_pkey;
    ALTER TABLE "user" DROP CONSTRAINT user_old_id_key;
    ALTER TABLE "user" RENAME COLUMN id TO remote_id;
    ALTER TABLE "user" RENAME COLUMN old_id TO id;
    ALTER TABLE "user" ADD PRIMARY KEY (id);
    ALTER TABLE "user" ADD UNIQUE (remote_id);
    """)

    # Add foreign key constraints
    for (table, col) in tables:
        op.execute(f"""
        ALTER TABLE {table} ADD CONSTRAINT {table}_{col}_fkey FOREIGN KEY ({col}) REFERENCES "user"(id) ON DELETE CASCADE;
        """)

    # Add unique constraints
    op.execute("""
    ALTER TABLE participant ADD CONSTRAINT participant_user_id_key UNIQUE (user_id);
    ALTER TABLE tracker ADD CONSTRAINT tracker_owner_id_name_unique UNIQUE (owner_id, name);
    """)
diff --git a/todosrht/alembic/versions/b5f503ac3ae9_add_user_remote_id.py b/todosrht/alembic/versions/b5f503ac3ae9_add_user_remote_id.py
new file mode 100644
index 0000000..a93d19d
--- /dev/null
+++ b/todosrht/alembic/versions/b5f503ac3ae9_add_user_remote_id.py
@@ -0,0 +1,54 @@
"""Add user.remote_id

Revision ID: b5f503ac3ae9
Revises: e1e2e901be0c
Create Date: 2022-06-16 10:11:00.727683

"""

# revision identifiers, used by Alembic.
revision = 'b5f503ac3ae9'
down_revision = 'e1e2e901be0c'

from alembic import op
import sqlalchemy as sa
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import scoped_session, sessionmaker
from srht.crypto import internal_anon
from srht.database import db
from srht.graphql import exec_gql

Base = declarative_base()

class User(Base):
    __tablename__ = "user"
    id = sa.Column(sa.Integer, primary_key=True)
    username = sa.Column(sa.Unicode(256), index=True, unique=True)
    remote_id = sa.Column(sa.Integer, unique=True)

def upgrade():
    engine = op.get_bind()
    session = scoped_session(sessionmaker(
        autocommit=False,
        autoflush=False,
        bind=engine))
    Base.query = session.query_property()

    op.execute("""ALTER TABLE "user" ADD COLUMN remote_id integer UNIQUE""")

    for user in User.query:
        user.remote_id = fetch_user_id(user.username)
        print(f"~{user.username} id: {user.id} -> {user.remote_id}")
    session.commit()

    op.execute("""ALTER TABLE "user" ALTER COLUMN remote_id SET NOT NULL""")

def downgrade():
    op.drop_column("user", "remote_id")

def fetch_user_id(username):
    resp = exec_gql("meta.sr.ht",
            "query($username: String!) { user(username: $username) { id } }",
            user=internal_anon,
            username=username)
    return resp["user"]["id"]
-- 
2.37.1

[todo.sr.ht/patches] build success

builds.sr.ht <builds@sr.ht>
Details
Message ID
<CLFJD88PC1H7.1F7P5YBW8RZWF@cirno>
In-Reply-To
<20220714164737.23517-1-me@adnano.co> (view parent)
DKIM signature
missing
Download raw message
todo.sr.ht/patches: SUCCESS in 8m47s

[todosrht: Use canonical user IDs][0] v2 from [Adnan Maolood][1]

[0]: https://lists.sr.ht/~sircmpwn/sr.ht-dev/patches/33879
[1]: me@adnano.co

✓ #801877 SUCCESS todo.sr.ht/patches/archlinux.yml https://builds.sr.ht/~sircmpwn/job/801877
✓ #801876 SUCCESS todo.sr.ht/patches/alpine.yml    https://builds.sr.ht/~sircmpwn/job/801876
✓ #801878 SUCCESS todo.sr.ht/patches/debian.yml    https://builds.sr.ht/~sircmpwn/job/801878
Details
Message ID
<CLR7YMN15N6P.198DQCQLZ0R1G@taiga>
In-Reply-To
<20220714164737.23517-1-me@adnano.co> (view parent)
DKIM signature
pass
Download raw message
Full review will take a while, but I do have one initial comment:

On Thu Jul 14, 2022 at 6:47 PM CEST, Adnan Maolood wrote:
> +def fetch_user_id(username):
> +    resp = exec_gql("meta.sr.ht",
> +            "query($username: String!) { user(username: $username) { id } }",
> +            user=internal_anon,
> +            username=username)
> +    return resp["user"]["id"]

We should make some attempt to gracefully deal with any errors that
could occur here.
Details
Message ID
<CLR7YXSBOF38.239NLT35F5UB3@taiga>
In-Reply-To
<20220714164737.23517-1-me@adnano.co> (view parent)
DKIM signature
pass
Download raw message
On Thu Jul 14, 2022 at 6:47 PM CEST, Adnan Maolood wrote:
> +def upgrade():
> +    engine = op.get_bind()
> +    session = scoped_session(sessionmaker(
> +        autocommit=False,
> +        autoflush=False,
> +        bind=engine))
> +    Base.query = session.query_property()
> +
> +    op.execute("""ALTER TABLE "user" ADD COLUMN remote_id integer UNIQUE""")
> +
> +    for user in User.query:
> +        user.remote_id = fetch_user_id(user.username)
> +        print(f"~{user.username} id: {user.id} -> {user.remote_id}")
> +    session.commit()

Some kind of progress indicator would be helpful
Details
Message ID
<CLR8HQ5JBVJN.LT5DJUGB8IJD@taiga>
In-Reply-To
<20220714164737.23517-1-me@adnano.co> (view parent)
DKIM signature
pass
Download raw message
Some issues:

celery_webhook_subscription and webhook_subscription don't exist

Also, I ran into an issue. I started with a fresh todo.sr.ht database
initialized with a schema dumped from production[0], logged in as user A
and created a tracker, added it to a hub project, and created a ticket.
Then I ran the migrations and everything works. However, when I logged
in as user B (who did not previously exist in the todo database), it
blew up:

sqlalchemy.exc.IntegrityError: (psycopg2.errors.UniqueViolation) duplicate key value violates unique constraint "user_old_id_key"
DETAIL:  Key (old_id)=(1) already exists.

[SQL: 
            INSERT INTO "user" (
                created, updated, id, username, email, user_type, url, location,
                bio, suspension_notice
            ) VALUES (
                NOW() at time zone 'utc',
                NOW() at time zone 'utc',
                %(id)s, %(name)s, %(email)s, %(user_type)s, %(url)s, %(location)s, %(bio)s, %(suspension_notice)s
            )
            ON CONFLICT (id)
            DO UPDATE SET
                updated = NOW() at time zone 'utc',
                username = %(name)s,
                email = %(email)s,
                user_type = %(user_type)s,
                url = %(url)s,
                location = %(location)s,
                bio = %(bio)s,
                suspension_notice = %(suspension_notice)s
            RETURNING id, username, email, user_type, url, location, bio, suspension_notice;
        ]
[parameters: {'id': 5, 'name': 'jim', 'email': 'jim@cmpwn.com', 'user_type': 'active_non_paying', 'url': None, 'location': None, 'bio': None, 'suspension_notice': None}]
(Background on this error at: https://sqlalche.me/e/14/gkpj)

[0]: https://paste.sr.ht/~sircmpwn/34b767daed7c2cebf656c7fb71cbf2c841ba570b

I will have more work to do to review & validate these patches, but
let's start by addressing these issues before we move on.
Details
Message ID
<CLRTE9DSJ884.3MBBZQLNW2FFM@framework>
In-Reply-To
<CLR8HQ5JBVJN.LT5DJUGB8IJD@taiga> (view parent)
DKIM signature
pass
Download raw message
On Thu Jul 28, 2022 at 6:57 AM EDT, Drew DeVault wrote:
> Some issues:
>
> celery_webhook_subscription and webhook_subscription don't exist

Odd. These tables are created by todosrht-initdb.

> Also, I ran into an issue. I started with a fresh todo.sr.ht database
> initialized with a schema dumped from production[0], logged in as user A
> and created a tracker, added it to a hub project, and created a ticket.
> Then I ran the migrations and everything works. However, when I logged
> in as user B (who did not previously exist in the todo database), it
> blew up:
>
> sqlalchemy.exc.IntegrityError: (psycopg2.errors.UniqueViolation) duplicate key value violates unique constraint "user_old_id_key"
> DETAIL:  Key (old_id)=(1) already exists.
>
> [SQL: 
>             INSERT INTO "user" (
>                 created, updated, id, username, email, user_type, url, location,
>                 bio, suspension_notice
>             ) VALUES (
>                 NOW() at time zone 'utc',
>                 NOW() at time zone 'utc',
>                 %(id)s, %(name)s, %(email)s, %(user_type)s, %(url)s, %(location)s, %(bio)s, %(suspension_notice)s
>             )
>             ON CONFLICT (id)
>             DO UPDATE SET
>                 updated = NOW() at time zone 'utc',
>                 username = %(name)s,
>                 email = %(email)s,
>                 user_type = %(user_type)s,
>                 url = %(url)s,
>                 location = %(location)s,
>                 bio = %(bio)s,
>                 suspension_notice = %(suspension_notice)s
>             RETURNING id, username, email, user_type, url, location, bio, suspension_notice;
>         ]
> [parameters: {'id': 5, 'name': 'jim', 'email': 'jim@cmpwn.com', 'user_type': 'active_non_paying', 'url': None, 'location': None, 'bio': None, 'suspension_notice': None}]
> (Background on this error at: https://sqlalche.me/e/14/gkpj)

I can't reproduce this issue. Can you confirm that user_id_seq is set to
the last value of user.old_id?
Reply to thread Export thread (mbox)