~emersion/public-inbox

This thread contains a patchset. You're looking at the original emails, but you may wish to use the patch review UI. Review patch
9 3

[PATCH gamja 1/2] fetchHistoryBetween: Use the 'before' argument to stop instead of inferring from the limit.

Valentin Lorentz
Details
Message ID
<20211023093721.1412757-1-progval+git@progval.net>
DKIM signature
pass
Download raw message
Patch: +5 -5
---
 lib/client.js | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/lib/client.js b/lib/client.js
index c80da50..fe0316a 100644
--- a/lib/client.js
+++ b/lib/client.js
@@ -750,12 +750,12 @@ export default class Client extends EventTarget {
			if (limit <= 0) {
				throw new Error("Cannot fetch all chat history: too many messages");
			}
			if (messages.length == max) {
				// There are still more messages to fetch
				after.time = messages[messages.length - 1].tags.time;
				return this.fetchHistoryBetween(target, after, before, limit);
			if (messages[0].tags.time <= before) {
				return null;
			}
			return null;
			// There are still more messages to fetch
			after.time = messages[messages.length - 1].tags.time;
			return this.fetchHistoryBetween(target, after, before, limit);
		});
	}

-- 
2.20.1

[PATCH gamja 2/2] Add support for msgid-based chathistory

Valentin Lorentz
Details
Message ID
<20211023093721.1412757-2-progval+git@progval.net>
In-Reply-To
<20211023093721.1412757-1-progval+git@progval.net> (view parent)
DKIM signature
pass
Download raw message
Patch: +46 -14
In addition to the default timestamp-based.
---
 components/app.js | 27 +++++++++++++++++++--------
 lib/client.js     | 33 +++++++++++++++++++++++++++------
 2 files changed, 46 insertions(+), 14 deletions(-)

diff --git a/components/app.js b/components/app.js
index 468157c..8e74b60 100644
--- a/components/app.js
+++ b/components/app.js
@@ -375,7 +375,7 @@ export default class App extends Component {
		// TODO: this doesn't trigger a redraw
		this.receipts.set(target, {
			...this.receipts.get(target),
			[type]: { time: msg.tags.time },
			[type]: { time: msg.tags.time, msgid: msg.tags.msgid },
		});
		this.saveReceipts();
	}
@@ -387,7 +387,7 @@ export default class App extends Component {
				return;
			}
			let delivery = receipts[type];
			if (!delivery || !delivery.time) {
			if (!delivery || !delivery.msgid || !delivery.time) {
				return;
			}
			if (!last || delivery.time > last.time) {
@@ -594,12 +594,18 @@ export default class App extends Component {
		switch (msg.command) {
		case irc.RPL_WELCOME:
			let lastReceipt = this.latestReceipt(ReceiptType.DELIVERED);
			if (lastReceipt && lastReceipt.time && client.enabledCaps["draft/chathistory"] && (!client.enabledCaps["soju.im/bouncer-networks"] || client.params.bouncerNetwork)) {
			if (lastReceipt && (lastReceipt.msgid || lastReceipt.time) && client.enabledCaps["draft/chathistory"] && (!client.enabledCaps["soju.im/bouncer-networks"] || client.params.bouncerNetwork)) {
				let now = irc.formatDate(new Date());
				client.fetchHistoryTargets(now, lastReceipt.time).then((targets) => {
				client.fetchHistoryTargets(now, lastReceipt).then((targets) => {
					targets.forEach((target) => {
						let from = lastReceipt;
						let to = { time: msg.tags.time || irc.formatDate(new Date()) };
						let to;
						if ("msgid" in msg.tags) {
							to = { msgid: msg.tags.msgid };
						}
						else {
							to = { time: msg.tags.time || irc.formatDate(new Date()) };
						}
						this.fetchBacklog(client, target.name, from, to);
					});
				});
@@ -1210,11 +1216,16 @@ export default class App extends Component {
			return;
		}

		let before;
		let before = {};
		if (buf.messages.length > 0) {
			before = buf.messages[0].tags["time"];
			if (buf.messages[0].tags.msgid) {
				before.msgid = buf.messages[0].tags.msgid;
			}
			if (buf.messages[0].tags.time) {
				before.time = buf.messages[0].tags.time;
			}
		} else {
			before = irc.formatDate(new Date());
			before.time = irc.formatDate(new Date());
		}

		// Avoids sending multiple CHATHISTORY commands in parallel
diff --git a/lib/client.js b/lib/client.js
index fe0316a..b98d75e 100644
--- a/lib/client.js
+++ b/lib/client.js
@@ -732,25 +732,46 @@ export default class Client extends EventTarget {
		return 100;
	}

	/* Fetch one page of history before the given date. */
	formatHistoryAnchor(anchor) {
		if (anchor.msgid) {
			return "msgid=" + anchor.msgid
		}
		else if (anchor.time) {
			return "timestamp=" + anchor.time
		}
		else {
			throw new Error("Unexpected history anchor: " + anchor);
		}
	}

	/* Fetch one page of history before the given anchor.
	 * 'before' must be an object with either a 'msgid' or 'timestamp' attribute. */
	fetchHistoryBefore(target, before, limit) {
		let max = Math.min(limit, this.chatHistoryPageSize());
		let params = ["BEFORE", target, "timestamp=" + before, max];
		let params = ["BEFORE", target, this.formatHistoryAnchor(before), max];
		return this.roundtripChatHistory(params).then((messages) => {
			return { more: messages.length >= max };
		});
	}

	/* Fetch history in ascending order. */
	/* Fetch history in ascending order between two anchors.
	 * 'before' and 'after' must an objects with either a 'msgid' or 'timestamp' attribute. */
	fetchHistoryBetween(target, after, before, limit) {
		let max = Math.min(limit, this.chatHistoryPageSize());
		let params = ["AFTER", target, "timestamp=" + after.time, max];
		let params = ["AFTER", target, this.formatHistoryAnchor(after), max];
		return this.roundtripChatHistory(params).then((messages) => {
			limit -= messages.length;
			if (limit <= 0) {
				throw new Error("Cannot fetch all chat history: too many messages");
			}
			if (messages[0].tags.time <= before) {
			for (message in messages) {
				if (before.msgid) {
					if (message.msgid == before.msgid) {
						return null;
					}
				}
			}
			if (before.time && messages[0].tags.time <= before.time) {
				return null;
			}
			// There are still more messages to fetch
@@ -762,7 +783,7 @@ export default class Client extends EventTarget {
	fetchHistoryTargets(t1, t2) {
		let msg = {
			command: "CHATHISTORY",
			params: ["TARGETS", "timestamp=" + t1, "timestamp=" + t2, 1000],
			params: ["TARGETS", this.formatHistoryAnchor(t1), this.formatHistoryAnchor(t2), 1000],
		};
		return this.fetchBatch(msg, "draft/chathistory-targets").then((batch) => {
			return batch.messages.map((msg) => {
-- 
2.20.1
Details
Message ID
<V-5eTBp0uL4ubT_2bsoiSHS2rcRwplQdufMrXqOsPx9x3T-_zqEGigyCgyp_cJIbE8SiGwRn-JratZ8YIiljRcO-3-x3ww1bY_GHPfsUazA=@emersion.fr>
In-Reply-To
<20211023093721.1412757-1-progval+git@progval.net> (view parent)
DKIM signature
pass
Download raw message
On Saturday, October 23rd, 2021 at 11:37, Valentin Lorentz <progval+git@progval.net> wrote:

> diff --git a/lib/client.js b/lib/client.js
> index c80da50..fe0316a 100644
> --- a/lib/client.js
> +++ b/lib/client.js
> @@ -750,12 +750,12 @@ export default class Client extends EventTarget {
>  			if (limit <= 0) {
>  				throw new Error("Cannot fetch all chat history: too many messages");
>  			}
> -			if (messages.length == max) {
> -				// There are still more messages to fetch
> -				after.time = messages[messages.length - 1].tags.time;
> -				return this.fetchHistoryBetween(target, after, before, limit);
> +			if (messages[0].tags.time <= before) {
> +				return null;

Hm, I'm not sure this is quite correct… I think this makes the loop terminate
immediately. The CHATHISTORY AFTER query will always return messages whose time
tag is before the `before` parameter.

Re: [PATCH gamja 2/2] Add support for msgid-based chathistory

Details
Message ID
<SxsXlXHUDDb26NV4SvpfwaE2YS32vwGMY-kg6G2jnpz15ko3uUkpndU_MdJw9S4jXJJ9ZKG1DlcfF7NatDGYxkDZ2zW1bN1hLBDVxnrt-Yg=@emersion.fr>
In-Reply-To
<20211023093721.1412757-2-progval+git@progval.net> (view parent)
DKIM signature
pass
Download raw message
Thanks a lot for taking the time to send this patch! Below are a few comments.

On Saturday, October 23rd, 2021 at 11:37, Valentin Lorentz <progval+git@progval.net> wrote:

> @@ -387,7 +387,7 @@ export default class App extends Component {
>  				return;
>  			}
>  			let delivery = receipts[type];
> -			if (!delivery || !delivery.time) {
> +			if (!delivery || !delivery.msgid || !delivery.time) {

I think this should be:

   if (!delivery || (!delivery.msgid && !delivery.time))

Otherwise this breaks when the server doesn't support msgids.

>  				return;
>  			}
>  			if (!last || delivery.time > last.time) {
> @@ -594,12 +594,18 @@ export default class App extends Component {
>  		switch (msg.command) {
>  		case irc.RPL_WELCOME:
>  			let lastReceipt = this.latestReceipt(ReceiptType.DELIVERED);
> -			if (lastReceipt && lastReceipt.time && client.enabledCaps["draft/chathistory"] && (!client.enabledCaps["soju.im/bouncer-networks"] || client.params.bouncerNetwork)) {
> +			if (lastReceipt && (lastReceipt.msgid || lastReceipt.time) && client.enabledCaps["draft/chathistory"] && (!client.enabledCaps["soju.im/bouncer-networks"] || client.params.bouncerNetwork)) {
>  				let now = irc.formatDate(new Date());
> -				client.fetchHistoryTargets(now, lastReceipt.time).then((targets) => {
> +				client.fetchHistoryTargets(now, lastReceipt).then((targets) => {
>  					targets.forEach((target) => {
>  						let from = lastReceipt;
> -						let to = { time: msg.tags.time || irc.formatDate(new Date()) };
> +						let to;
> +						if ("msgid" in msg.tags) {
> +							to = { msgid: msg.tags.msgid };
> +						}
> +						else {

Style nit: `}` and `else {` should be on the same line.

(Note: there are other occurences of this later on.)

> +							to = { time: msg.tags.time || irc.formatDate(new Date()) };
> +						}

Nit: this could be simplified to something like:

    let to = { msgid: msg.tags.msgid, time: msg.tags.time || irc.formatDate(new Date()) };

>  						this.fetchBacklog(client, target.name, from, to);
>  					});
>  				});
> @@ -1210,11 +1216,16 @@ export default class App extends Component {
>  			return;
>  		}
>
> -		let before;
> +		let before = {};
>  		if (buf.messages.length > 0) {
> -			before = buf.messages[0].tags["time"];
> +			if (buf.messages[0].tags.msgid) {
> +				before.msgid = buf.messages[0].tags.msgid;
> +			}
> +			if (buf.messages[0].tags.time) {
> +				before.time = buf.messages[0].tags.time;
> +			}

Same here.

>  		} else {
> -			before = irc.formatDate(new Date());
> +			before.time = irc.formatDate(new Date());
>  		}
>
>  		// Avoids sending multiple CHATHISTORY commands in parallel
> diff --git a/lib/client.js b/lib/client.js
> index fe0316a..b98d75e 100644
> --- a/lib/client.js
> +++ b/lib/client.js
> @@ -732,25 +732,46 @@ export default class Client extends EventTarget {
>  		return 100;
>  	}
>
> -	/* Fetch one page of history before the given date. */
> +	formatHistoryAnchor(anchor) {
> +		if (anchor.msgid) {
> +			return "msgid=" + anchor.msgid
> +		}
> +		else if (anchor.time) {
> +			return "timestamp=" + anchor.time
> +		}
> +		else {
> +			throw new Error("Unexpected history anchor: " + anchor);
> +		}
> +	}
> +
> +	/* Fetch one page of history before the given anchor.
> +	 * 'before' must be an object with either a 'msgid' or 'timestamp' attribute. */
>  	fetchHistoryBefore(target, before, limit) {
>  		let max = Math.min(limit, this.chatHistoryPageSize());
> -		let params = ["BEFORE", target, "timestamp=" + before, max];
> +		let params = ["BEFORE", target, this.formatHistoryAnchor(before), max];
>  		return this.roundtripChatHistory(params).then((messages) => {
>  			return { more: messages.length >= max };
>  		});
>  	}
>
> -	/* Fetch history in ascending order. */
> +	/* Fetch history in ascending order between two anchors.
> +	 * 'before' and 'after' must an objects with either a 'msgid' or 'timestamp' attribute. */
>  	fetchHistoryBetween(target, after, before, limit) {
>  		let max = Math.min(limit, this.chatHistoryPageSize());
> -		let params = ["AFTER", target, "timestamp=" + after.time, max];
> +		let params = ["AFTER", target, this.formatHistoryAnchor(after), max];
>  		return this.roundtripChatHistory(params).then((messages) => {
>  			limit -= messages.length;
>  			if (limit <= 0) {
>  				throw new Error("Cannot fetch all chat history: too many messages");
>  			}
> -			if (messages[0].tags.time <= before) {
> +			for (message in messages) {
> +				if (before.msgid) {
> +					if (message.msgid == before.msgid) {
> +						return null;
> +					}
> +				}
> +			}
> +			if (before.time && messages[0].tags.time <= before.time) {
>  				return null;
>  			}

I'd prefer to rewrite this logic to only use msgids if we have them, and only
use timestamps otherwise. Something like:

   if (before.msgid) {
       // msgid logic
   } else {
       // timestamp logic
   }

>  			// There are still more messages to fetch
> @@ -762,7 +783,7 @@ export default class Client extends EventTarget {
>  	fetchHistoryTargets(t1, t2) {
>  		let msg = {
>  			command: "CHATHISTORY",
> -			params: ["TARGETS", "timestamp=" + t1, "timestamp=" + t2, 1000],
> +			params: ["TARGETS", this.formatHistoryAnchor(t1), this.formatHistoryAnchor(t2), 1000],

CHATHISTORY TARGETS always expects timestamps IIRC?

Maybe for consistency we should accept objects as input, but we need to make
sure not to never use msgid criteria here.

>  		};
>  		return this.fetchBatch(msg, "draft/chathistory-targets").then((batch) => {
>  			return batch.messages.map((msg) => {
> --
> 2.20.1
Val Lorentz
Details
Message ID
<5f2d11cb-f97f-29b3-712e-d10cf882c5f0@progval.net>
In-Reply-To
<V-5eTBp0uL4ubT_2bsoiSHS2rcRwplQdufMrXqOsPx9x3T-_zqEGigyCgyp_cJIbE8SiGwRn-JratZ8YIiljRcO-3-x3ww1bY_GHPfsUazA=@emersion.fr> (view parent)
DKIM signature
pass
Download raw message

On 25/10/2021 20:56, Simon Ser wrote:
> On Saturday, October 23rd, 2021 at 11:37, Valentin Lorentz <progval+git@progval.net> wrote:
> 
>> diff --git a/lib/client.js b/lib/client.js
>> index c80da50..fe0316a 100644
>> --- a/lib/client.js
>> +++ b/lib/client.js
>> @@ -750,12 +750,12 @@ export default class Client extends EventTarget {
>>  			if (limit <= 0) {
>>  				throw new Error("Cannot fetch all chat history: too many messages");
>>  			}
>> -			if (messages.length == max) {
>> -				// There are still more messages to fetch
>> -				after.time = messages[messages.length - 1].tags.time;
>> -				return this.fetchHistoryBetween(target, after, before, limit);
>> +			if (messages[0].tags.time <= before) {
>> +				return null;
> 
> Hm, I'm not sure this is quite correct… I think this makes the loop terminate
> immediately. The CHATHISTORY AFTER query will always return messages whose time
> tag is before the `before` parameter.
> 

The function (fetchHistoryBetween) only makes sense when after < before.
On the first iteration of the loop, after <= messages[0].tags.time, and
the value of `messages[0].tags.time` grows as we iterate over batches of
messages, until it reaches `before`.

Re: [PATCH gamja 2/2] Add support for msgid-based chathistory

Val Lorentz
Details
Message ID
<336d41ff-ce11-c111-216f-1844441166f4@progval.net>
In-Reply-To
<SxsXlXHUDDb26NV4SvpfwaE2YS32vwGMY-kg6G2jnpz15ko3uUkpndU_MdJw9S4jXJJ9ZKG1DlcfF7NatDGYxkDZ2zW1bN1hLBDVxnrt-Yg=@emersion.fr> (view parent)
DKIM signature
pass
Download raw message

On 25/10/2021 21:05, Simon Ser wrote:
>> -			if (!delivery || !delivery.time) {
>> +			if (!delivery || !delivery.msgid || !delivery.time) {
> 
> I think this should be:
> 
>    if (!delivery || (!delivery.msgid && !delivery.time))
> 
> Otherwise this breaks when the server doesn't support msgids.

Is this because of some weird type coercion? It seems equivalent to me
otherwise.

>> +			if (before.time && messages[0].tags.time <= before.time) {
>>  				return null;
>>  			}
> 
> I'd prefer to rewrite this logic to only use msgids if we have them, and only
> use timestamps otherwise. Something like:
> 
>    if (before.msgid) {
>        // msgid logic
>    } else {
>        // timestamp logic
>    }
> 
There is a risk of fetching way too much (up to the limit) if the
message is missing from the history from some reason.
Details
Message ID
<TRCDvTouOB8uE71V7xjAO9kEivdnK_XJEKG_HsK4tkb1AK7RKyAfRLlVT2JZqQZ42YmSr-8oMXKNDVBK3fqzhACaW8gbjnnsNsukNrWYOqU=@emersion.fr>
In-Reply-To
<5f2d11cb-f97f-29b3-712e-d10cf882c5f0@progval.net> (view parent)
DKIM signature
pass
Download raw message
On Monday, October 25th, 2021 at 23:32, Val Lorentz <progval@progval.net> wrote:

> On 25/10/2021 20:56, Simon Ser wrote:
> > On Saturday, October 23rd, 2021 at 11:37, Valentin Lorentz <progval+git@progval.net> wrote:
> >
> >> diff --git a/lib/client.js b/lib/client.js
> >> index c80da50..fe0316a 100644
> >> --- a/lib/client.js
> >> +++ b/lib/client.js
> >> @@ -750,12 +750,12 @@ export default class Client extends EventTarget {
> >>  			if (limit <= 0) {
> >>  				throw new Error("Cannot fetch all chat history: too many messages");
> >>  			}
> >> -			if (messages.length == max) {
> >> -				// There are still more messages to fetch
> >> -				after.time = messages[messages.length - 1].tags.time;
> >> -				return this.fetchHistoryBetween(target, after, before, limit);
> >> +			if (messages[0].tags.time <= before) {
> >> +				return null;
> >
> > Hm, I'm not sure this is quite correct… I think this makes the loop terminate
> > immediately. The CHATHISTORY AFTER query will always return messages whose time
> > tag is before the `before` parameter.
> >
>
> The function (fetchHistoryBetween) only makes sense when after < before.

Hmm. I'm a bit confused because `before` is never used in the function. I think
this is the result of an incremental refactoring (we were using BETWEEN and
time comparisons before, but switched away from that). I think the `before`
argument should just be removed, and the function renamed to fetchHistoryAfter.

> On the first iteration of the loop, after <= messages[0].tags.time, and
> the value of `messages[0].tags.time` grows as we iterate over batches of
> messages, until it reaches `before`.

I don't think so: all messages return by CHATHISTORY AFTER timestamp=XXX will
have a timestamp strictly greater than the provided timestamp.

But perhaps a better question is: why is this change needed? What does it fix?
Val Lorentz
Details
Message ID
<db4fa588-2a36-2eb5-9db2-2f1fb5d4f45e@progval.net>
In-Reply-To
<TRCDvTouOB8uE71V7xjAO9kEivdnK_XJEKG_HsK4tkb1AK7RKyAfRLlVT2JZqQZ42YmSr-8oMXKNDVBK3fqzhACaW8gbjnnsNsukNrWYOqU=@emersion.fr> (view parent)
DKIM signature
pass
Download raw message
On 01/11/2021 19:44, Simon Ser wrote:
> Hmm. I'm a bit confused because `before` is never used in the function. I think
> this is the result of an incremental refactoring (we were using BETWEEN and
> time comparisons before, but switched away from that). I think the `before`
> argument should just be removed, and the function renamed to fetchHistoryAfter.
> 
>> On the first iteration of the loop, after <= messages[0].tags.time, and
>> the value of `messages[0].tags.time` grows as we iterate over batches of
>> messages, until it reaches `before`.
> 
> I don't think so: all messages return by CHATHISTORY AFTER timestamp=XXX will
> have a timestamp strictly greater than the provided timestamp.
> 
> But perhaps a better question is: why is this change needed? What does it fix?

Because I don't think it is safe to assume the server will always return
exactly <limit> messages unless we reached the end of the history.

Re: [PATCH gamja 2/2] Add support for msgid-based chathistory

Details
Message ID
<o0Hff1alisxqzYsB6V_KKX4GnGYzAoJDWb_wVJAVBOTmQHS94PavZhz0tdrWTaCoVDg6XMFw3azs61VvD9IPGVs46lxdAgZ89Qa91VIRUcc=@emersion.fr>
In-Reply-To
<336d41ff-ce11-c111-216f-1844441166f4@progval.net> (view parent)
DKIM signature
pass
Download raw message
On Monday, October 25th, 2021 at 23:32, Val Lorentz <progval@progval.net> wrote:

> On 25/10/2021 21:05, Simon Ser wrote:
> >> -			if (!delivery || !delivery.time) {
> >> +			if (!delivery || !delivery.msgid || !delivery.time) {
> >
> > I think this should be:
> >
> >    if (!delivery || (!delivery.msgid && !delivery.time))
> >
> > Otherwise this breaks when the server doesn't support msgids.
>
> Is this because of some weird type coercion? It seems equivalent to me
> otherwise.

Let's say `delivery = { time: "XXX", msgid: null }`. Then your if evaluates to
true (because of `!delivery.msgid`), and mine to false.

> >> +			if (before.time && messages[0].tags.time <= before.time) {
> >>  				return null;
> >>  			}
> >
> > I'd prefer to rewrite this logic to only use msgids if we have them, and only
> > use timestamps otherwise. Something like:
> >
> >    if (before.msgid) {
> >        // msgid logic
> >    } else {
> >        // timestamp logic
> >    }
> >
> There is a risk of fetching way too much (up to the limit) if the
> message is missing from the history from some reason.

Hm, that makes sense, but the whole point of msgid is to avoid timestamp
comparisons which are prone to clock precision issues: if two messages are sent
with exactly the same timestamp then messages will be dropped.

But all of this assumes the previous patch makes sense, if we can drop it then
we can drop this part of the patch as well.
Details
Message ID
<mEWJGZhApTCkyrgk3DE1L2FQ-QLERx0ACqqYjKJgUntqRUN326VNLaA5P1TqeOedjjYphtb5L91KlhCO4dSX5RSNNEA3aMfHsvorQdmy1Zg=@emersion.fr>
In-Reply-To
<db4fa588-2a36-2eb5-9db2-2f1fb5d4f45e@progval.net> (view parent)
DKIM signature
pass
Download raw message
On Monday, November 1st, 2021 at 19:55, Val Lorentz <progval@progval.net> wrote:

> > But perhaps a better question is: why is this change needed? What does it fix?
>
> Because I don't think it is safe to assume the server will always return
> exactly <limit> messages unless we reached the end of the history.

I see. I wouldn't be against changing the check to something like

    if (messages.length === 0) { /* end of history */ }

but this kind of hides the root cause. If a server drops *some* messages, it
might also drop *all* messages, but still have more history.

See also [1]: ergo needs to filter some messages depending on the privileges of
the user, but might end up filtering out all of them in pathological cases. It
might be worth it to add some clarifications to the spec.

[1]: https://github.com/ergochat/ergo/issues/1676
Reply to thread Export thread (mbox)