This thread contains a patchset. You're looking at the original emails,
but you may wish to use the patch review UI.
Review patch
12
4
[PATCH gio 0/5] Multiple optimizations
This is implemented on top of https://lists.sr.ht/~eliasnaur/gio-
patches/patches/33494 change.
This contains bunch of tiny optimizations for functions that are in the
hot path.
Egon Elbre (5):
f32: add Affine2D.IsOffset and GetOffset
internal/ops: use lookup table for NumRefs
internal/ops: avoid some bounds checks in decode
internal/ops: use single table for OpType
internal/ops: optimize Decode
f32/affine.go | 11 ++++
gpu/gpu.go | 10 +---
internal/ops/ops.go | 129 ++++++++++++++++++++---------------------
internal/ops/reader.go | 45 ++++++--------
4 files changed, 92 insertions(+), 103 deletions(-)
--
2.34.2
[PATCH gio 1/5] f32: add Affine2D.IsOffset and GetOffset
From: Egon Elbre <egonelbre@gmail.com>
Signed-off-by: Egon Elbre <egonelbre@gmail.com>
---
f32/affine.go | 11 +++++++++++
gpu/gpu.go | 10 ++ --------
2 files changed, 13 insertions(+), 8 deletions(-)
diff --git a/f32/affine.go b/f32/affine.go
index b056f720..c1bb9b9b 100644
--- a/f32/affine.go
+++ b/f32/affine.go
@@ -121,6 +121,17 @@ func (a *Affine2D) Split() (srs Affine2D, offset Point) {
}, Point{X: a.c, Y: a.f}
}
+ // IsOffset returns whether the transform only contains an offset.
+ func (a *Affine2D) IsOffset() bool {
+ return a.a == 0 && a.b == 0 && a.d == 0 && a.e == 0
+ }
+
+ // GetOffset returns the offset from the affine transform.
+ // It does not take into account any scaling, shearing and rotation.
+ func (a *Affine2D) GetOffset() Point {
+ return Point{X: a.c, Y: a.f}
+ }
+
func (a Affine2D) scale(factor Point) Affine2D {
return Affine2D{
(a.a+1)*factor.X - 1, a.b * factor.X, a.c * factor.X,
diff --git a/gpu/gpu.go b/gpu/gpu.go
index 0977415b..83135416 100644
--- a/gpu/gpu.go
+++ b/gpu/gpu.go
@@ -1338,10 +1338,9 @@ func decodeToOutlineQuads(qs *quadSplitter, tr f32.Affine2D, pathData []byte) {
// create GPU vertices for transformed r, find the bounds and establish texture transform.
func (d *drawOps) boundsForTransformedRect(r f32.Rectangle, tr f32.Affine2D) (aux []byte, bnd f32.Rectangle, ptr f32.Affine2D) {
- if isPureOffset(tr) {
+ if tr.IsOffset() {
// fast-path to allow blitting of pure rectangles
- _, _, ox, _, _, oy := tr.Elems()
- off := f32.Pt(ox, oy)
+ off := tr.GetOffset()
bnd.Min = r.Min.Add(off)
bnd.Max = r.Max.Add(off)
return
@@ -1392,8 +1391,3 @@ func (d *drawOps) boundsForTransformedRect(r f32.Rectangle, tr f32.Affine2D) (au
return
}
-
- func isPureOffset(t f32.Affine2D) bool {
- a, b, _, d, e, _ := t.Elems()
- return a == 1 && b == 0 && d == 0 && e == 1
- }
--
2.34.2
[PATCH gio 2/5] internal/ops: use lookup table for NumRefs
From: Egon Elbre <egonelbre@gmail.com>
Signed-off-by: Egon Elbre <egonelbre@gmail.com>
---
internal/ops/ops.go | 31 +++++++++++++++++++++ ----------
1 file changed, 21 insertions(+), 10 deletions(-)
diff --git a/internal/ops/ops.go b/internal/ops/ops.go
index 2d4d1138..3691c272 100644
--- a/internal/ops/ops.go
+++ b/internal/ops/ops.go
@@ -426,17 +426,28 @@ func (t OpType) Size() int {
return int(opSize[t])
}
+ var opNumRefs = [0x100]byte{
+ TypeKeyFocus: 1,
+ TypePointerInput: 1,
+ TypeProfile: 1,
+ TypeCall: 1,
+ TypeClipboardRead: 1,
+ TypeClipboardWrite: 1,
+ TypeSemanticLabel: 1,
+ TypeSemanticDesc: 1,
+ TypeSelection: 1,
+
+ TypeKeyInput: 2,
+ TypeImage: 2,
+ TypeSource: 2,
+ TypeTarget: 2,
+ TypeSnippet: 2,
+
+ TypeOffer: 3,
+ }
+
func (t OpType) NumRefs() int {
- switch t {
- case TypeKeyFocus, TypePointerInput, TypeProfile, TypeCall, TypeClipboardRead, TypeClipboardWrite, TypeSemanticLabel, TypeSemanticDesc, TypeSelection:
- return 1
- case TypeKeyInput, TypeImage, TypeSource, TypeTarget, TypeSnippet:
- return 2
- case TypeOffer:
- return 3
- default:
- return 0
- }
+ return int(opNumRefs[t])
}
func (t OpType) String() string {
--
2.34.2
[PATCH gio 3/5] internal/ops: avoid some bounds checks in decode
From: Egon Elbre <egonelbre@gmail.com>
Signed-off-by: Egon Elbre <egonelbre@gmail.com>
---
internal/ops/ops.go | 3 ++ -
internal/ops/reader.go | 6 +++ ---
2 files changed, 5 insertions(+), 4 deletions(-)
diff --git a/internal/ops/ops.go b/internal/ops/ops.go
index 3691c272..c68a51ee 100644
--- a/internal/ops/ops.go
+++ b/internal/ops/ops.go
@@ -163,9 +163,10 @@ const (
)
func (op *ClipOp) Decode(data []byte) {
- if OpType(data[0]) != TypeClip {
+ if len(data) < TypeClipLen || OpType(data[0]) != TypeClip {
panic("invalid op")
}
+ data = data[:TypeClipLen]
bo := binary.LittleEndian
r := image.Rectangle{
Min: image.Point{
diff --git a/internal/ops/reader.go b/internal/ops/reader.go
index 63ec9cc3..b7f0f006 100644
--- a/internal/ops/reader.go
+++ b/internal/ops/reader.go
@@ -160,7 +160,7 @@ func (r *Reader) Decode() (EncodedOp, bool) {
}
func (op *opMacroDef) decode(data []byte) {
- if OpType(data[0]) != TypeMacro {
+ if len(data) < TypeMacroLen || OpType(data[0]) != TypeMacro {
panic("invalid op")
}
bo := binary.LittleEndian
@@ -176,11 +176,11 @@ func (op *opMacroDef) decode(data []byte) {
}
func (m *macroOp) decode(data []byte, refs []interface{}) {
- if OpType(data[0]) != TypeCall {
+ if len(data) < TypeCallLen || len(refs) < 1 || OpType(data[0]) != TypeCall {
panic("invalid op")
}
- data = data[:TypeCallLen]
bo := binary.LittleEndian
+ data = data[:TypeCallLen]
*m = macroOp{
ops: refs[0].(*Ops),
start: PC{
--
2.34.2
[PATCH gio 4/5] internal/ops: use single table for OpType
From: Egon Elbre <egonelbre@gmail.com>
Size and NumRefs are always used together, so consolidate info to
a single table to avoid two separate lookups.
Signed-off-by: Egon Elbre <egonelbre@gmail.com>
---
internal/ops/ops.go | 116 +++++++++++++++++++ ----------------------
internal/ops/reader.go | 12 ++ ---
2 files changed, 60 insertions(+), 68 deletions(-)
diff --git a/internal/ops/ops.go b/internal/ops/ops.go
index c68a51ee..a998f1cf 100644
--- a/internal/ops/ops.go
+++ b/internal/ops/ops.go
@@ -381,74 +381,66 @@ func DecodeLoad(data []byte) int {
return int(bo.Uint32(data[1:]))
}
- var opSize = [0x100]byte{
- TypeMacro: TypeMacroLen,
- TypeCall: TypeCallLen,
- TypeDefer: TypeDeferLen,
- TypePushTransform: TypePushTransformLen,
- TypeTransform: TypeTransformLen,
- TypePopTransform: TypePopTransformLen,
- TypeInvalidate: TypeRedrawLen,
- TypeImage: TypeImageLen,
- TypePaint: TypePaintLen,
- TypeColor: TypeColorLen,
- TypeLinearGradient: TypeLinearGradientLen,
- TypePass: TypePassLen,
- TypePopPass: TypePopPassLen,
- TypePointerInput: TypePointerInputLen,
- TypeClipboardRead: TypeClipboardReadLen,
- TypeClipboardWrite: TypeClipboardWriteLen,
- TypeSource: TypeSourceLen,
- TypeTarget: TypeTargetLen,
- TypeOffer: TypeOfferLen,
- TypeKeyInput: TypeKeyInputLen,
- TypeKeyFocus: TypeKeyFocusLen,
- TypeKeySoftKeyboard: TypeKeySoftKeyboardLen,
- TypeSave: TypeSaveLen,
- TypeLoad: TypeLoadLen,
- TypeAux: TypeAuxLen,
- TypeClip: TypeClipLen,
- TypePopClip: TypePopClipLen,
- TypeProfile: TypeProfileLen,
- TypeCursor: TypeCursorLen,
- TypePath: TypePathLen,
- TypeStroke: TypeStrokeLen,
- TypeSemanticLabel: TypeSemanticLabelLen,
- TypeSemanticDesc: TypeSemanticDescLen,
- TypeSemanticClass: TypeSemanticClassLen,
- TypeSemanticSelected: TypeSemanticSelectedLen,
- TypeSemanticDisabled: TypeSemanticDisabledLen,
- TypeSnippet: TypeSnippetLen,
- TypeSelection: TypeSelectionLen,
- TypeActionInput: TypeActionInputLen,
+ type opProp struct {
+ Size byte
+ NumRefs byte
+ }
+
+ var opProps = [0x100]opProp{
+ TypeMacro: {Size: TypeMacroLen, NumRefs: 0},
+ TypeCall: {Size: TypeCallLen, NumRefs: 1},
+ TypeDefer: {Size: TypeDeferLen, NumRefs: 0},
+ TypePushTransform: {Size: TypePushTransformLen, NumRefs: 0},
+ TypeTransform: {Size: TypeTransformLen, NumRefs: 0},
+ TypePopTransform: {Size: TypePopTransformLen, NumRefs: 0},
+ TypeInvalidate: {Size: TypeRedrawLen, NumRefs: 0},
+ TypeImage: {Size: TypeImageLen, NumRefs: 2},
+ TypePaint: {Size: TypePaintLen, NumRefs: 0},
+ TypeColor: {Size: TypeColorLen, NumRefs: 0},
+ TypeLinearGradient: {Size: TypeLinearGradientLen, NumRefs: 0},
+ TypePass: {Size: TypePassLen, NumRefs: 0},
+ TypePopPass: {Size: TypePopPassLen, NumRefs: 0},
+ TypePointerInput: {Size: TypePointerInputLen, NumRefs: 1},
+ TypeClipboardRead: {Size: TypeClipboardReadLen, NumRefs: 1},
+ TypeClipboardWrite: {Size: TypeClipboardWriteLen, NumRefs: 1},
+ TypeSource: {Size: TypeSourceLen, NumRefs: 2},
+ TypeTarget: {Size: TypeTargetLen, NumRefs: 2},
+ TypeOffer: {Size: TypeOfferLen, NumRefs: 3},
+ TypeKeyInput: {Size: TypeKeyInputLen, NumRefs: 2},
+ TypeKeyFocus: {Size: TypeKeyFocusLen, NumRefs: 1},
+ TypeKeySoftKeyboard: {Size: TypeKeySoftKeyboardLen, NumRefs: 0},
+ TypeSave: {Size: TypeSaveLen, NumRefs: 0},
+ TypeLoad: {Size: TypeLoadLen, NumRefs: 0},
+ TypeAux: {Size: TypeAuxLen, NumRefs: 0},
+ TypeClip: {Size: TypeClipLen, NumRefs: 0},
+ TypePopClip: {Size: TypePopClipLen, NumRefs: 0},
+ TypeProfile: {Size: TypeProfileLen, NumRefs: 1},
+ TypeCursor: {Size: TypeCursorLen, NumRefs: 0},
+ TypePath: {Size: TypePathLen, NumRefs: 0},
+ TypeStroke: {Size: TypeStrokeLen, NumRefs: 0},
+ TypeSemanticLabel: {Size: TypeSemanticLabelLen, NumRefs: 1},
+ TypeSemanticDesc: {Size: TypeSemanticDescLen, NumRefs: 1},
+ TypeSemanticClass: {Size: TypeSemanticClassLen, NumRefs: 0},
+ TypeSemanticSelected: {Size: TypeSemanticSelectedLen, NumRefs: 0},
+ TypeSemanticDisabled: {Size: TypeSemanticDisabledLen, NumRefs: 0},
+ TypeSnippet: {Size: TypeSnippetLen, NumRefs: 2},
+ TypeSelection: {Size: TypeSelectionLen, NumRefs: 1},
+ TypeActionInput: {Size: TypeActionInputLen, NumRefs: 0},
+ }
+
+ var opSize = [0x100]byte{}
+
+ func (t OpType) props() (size, numRefs int) {
+ v := opProps[t]
+ return int(v.Size), int(v.NumRefs)
}
func (t OpType) Size() int {
- return int(opSize[t])
- }
-
- var opNumRefs = [0x100]byte{
- TypeKeyFocus: 1,
- TypePointerInput: 1,
- TypeProfile: 1,
- TypeCall: 1,
- TypeClipboardRead: 1,
- TypeClipboardWrite: 1,
- TypeSemanticLabel: 1,
- TypeSemanticDesc: 1,
- TypeSelection: 1,
-
- TypeKeyInput: 2,
- TypeImage: 2,
- TypeSource: 2,
- TypeTarget: 2,
- TypeSnippet: 2,
-
- TypeOffer: 3,
+ return int(opProps[t].Size)
}
func (t OpType) NumRefs() int {
- return int(opNumRefs[t])
+ return int(opProps[t].NumRefs)
}
func (t OpType) String() string {
diff --git a/internal/ops/reader.go b/internal/ops/reader.go
index b7f0f006..fbfc7f70 100644
--- a/internal/ops/reader.go
+++ b/internal/ops/reader.go
@@ -54,9 +54,10 @@ type opMacroDef struct {
}
func (pc PC) Add(op OpType) PC {
+ size, numRefs := op.props()
return PC{
- data: pc.data + op.Size(),
- refs: pc.refs + op.NumRefs(),
+ data: pc.data + size,
+ refs: pc.refs + numRefs,
}
}
@@ -104,8 +105,7 @@ func (r *Reader) Decode() (EncodedOp, bool) {
}
key := Key{ops: r.ops, pc: r.pc.data, version: r.ops.version}
t := OpType(data[0])
- n := t.Size()
- nrefs := t.NumRefs()
+ n, nrefs := t.props()
data = data[:n]
refs = refs[r.pc.refs:]
refs = refs[:nrefs]
@@ -125,10 +125,10 @@ func (r *Reader) Decode() (EncodedOp, bool) {
if deferring {
deferring = false
// Copy macro for deferred execution.
- if t.NumRefs() != 1 {
+ if nrefs != 1 {
panic("internal error: unexpected number of macro refs")
}
- deferData := Write1(&r.deferOps, t.Size(), refs[0])
+ deferData := Write1(&r.deferOps, n, refs[0])
copy(deferData, data)
r.pc.data += n
r.pc.refs += nrefs
--
2.34.2
[PATCH gio 5/5] internal/ops: optimize Decode
From: Egon Elbre <egonelbre@gmail.com>
Using clean struct creation creates a lot of temporary variables in
assembly. Inline the assignments, which generates less code.
Signed-off-by: Egon Elbre <egonelbre@gmail.com>
---
internal/ops/ops.go | 21 ++++++ ---------------
internal/ops/reader.go | 27 ++++++++ -------------------
2 files changed, 14 insertions(+), 34 deletions(-)
diff --git a/internal/ops/ops.go b/internal/ops/ops.go
index a998f1cf..edaac6e7 100644
--- a/internal/ops/ops.go
+++ b/internal/ops/ops.go
@@ -168,21 +168,12 @@ func (op *ClipOp) Decode(data []byte) {
}
data = data[:TypeClipLen]
bo := binary.LittleEndian
- r := image.Rectangle{
- Min: image.Point{
- X: int(int32(bo.Uint32(data[1:]))),
- Y: int(int32(bo.Uint32(data[5:]))),
- },
- Max: image.Point{
- X: int(int32(bo.Uint32(data[9:]))),
- Y: int(int32(bo.Uint32(data[13:]))),
- },
- }
- *op = ClipOp{
- Bounds: r,
- Outline: data[17] == 1,
- Shape: Shape(data[18]),
- }
+ op.Bounds.Min.X = int(int32(bo.Uint32(data[1:])))
+ op.Bounds.Min.Y = int(int32(bo.Uint32(data[5:])))
+ op.Bounds.Max.X = int(int32(bo.Uint32(data[9:])))
+ op.Bounds.Max.Y = int(int32(bo.Uint32(data[13:])))
+ op.Outline = data[17] == 1
+ op.Shape = Shape(data[18])
}
func Reset(o *Ops) {
diff --git a/internal/ops/reader.go b/internal/ops/reader.go
index fbfc7f70..7e212dfb 100644
--- a/internal/ops/reader.go
+++ b/internal/ops/reader.go
@@ -165,14 +165,8 @@ func (op *opMacroDef) decode(data []byte) {
}
bo := binary.LittleEndian
data = data[:TypeMacroLen]
- dataIdx := int(int32(bo.Uint32(data[1:])))
- refsIdx := int(int32(bo.Uint32(data[5:])))
- *op = opMacroDef{
- endpc: PC{
- data: dataIdx,
- refs: refsIdx,
- },
- }
+ op.endpc.data = int(int32(bo.Uint32(data[1:])))
+ op.endpc.refs = int(int32(bo.Uint32(data[5:])))
}
func (m *macroOp) decode(data []byte, refs []interface{}) {
@@ -181,15 +175,10 @@ func (m *macroOp) decode(data []byte, refs []interface{}) {
}
bo := binary.LittleEndian
data = data[:TypeCallLen]
- *m = macroOp{
- ops: refs[0].(*Ops),
- start: PC{
- data: int(int32(bo.Uint32(data[1:]))),
- refs: int(int32(bo.Uint32(data[5:]))),
- },
- end: PC{
- data: int(int32(bo.Uint32(data[9:]))),
- refs: int(int32(bo.Uint32(data[13:]))),
- },
- }
+
+ m.ops = refs[0].(*Ops)
+ m.start.data = int(int32(bo.Uint32(data[1:])))
+ m.start.refs = int(int32(bo.Uint32(data[5:])))
+ m.end.data = int(int32(bo.Uint32(data[9:])))
+ m.end.refs = int(int32(bo.Uint32(data[13:])))
}
--
2.34.2
[gio/patches] build failed
Re: [PATCH gio 1/5] f32: add Affine2D.IsOffset and GetOffset
I assume this is for performance, right? If so, why isn't the Go compiler clever
enough to see inline Elem?
On Sat Jul 2, 2022 at 2:11 PM CEST, ~egonelbre wrote:
> From: Egon Elbre <egonelbre@gmail.com >
>
> Signed-off-by: Egon Elbre <egonelbre@gmail.com >
> ---
> f32/affine.go | 11 +++++++++++
> gpu/gpu.go | 10 ++--------
> 2 files changed, 13 insertions(+), 8 deletions(-)
>
> diff --git a/f32/affine.go b/f32/affine.go
> index b056f720..c1bb9b9b 100644
> --- a/f32/affine.go
> +++ b/f32/affine.go
> @@ -121,6 +121,17 @@ func (a *Affine2D) Split() (srs Affine2D, offset Point) {
> }, Point{X: a.c, Y: a.f}
> }
>
> +// IsOffset returns whether the transform only contains an offset.
> +func (a *Affine2D) IsOffset() bool {
> + return a.a == 0 && a.b == 0 && a.d == 0 && a.e == 0
> +}
> +
> +// GetOffset returns the offset from the affine transform.
> +// It does not take into account any scaling, shearing and rotation.
> +func (a *Affine2D) GetOffset() Point {
Two exported methods for what is arguably a Go compiler weakness is a bit much. Can
Split be generalized? Or else
func (a *Affine2D) SplitOffset() (f32.Point, bool)
?
> + return Point{X: a.c, Y: a.f}
> +}
> +
> func (a Affine2D) scale(factor Point) Affine2D {
> return Affine2D{
> (a.a+1)*factor.X - 1, a.b * factor.X, a.c * factor.X,
> diff --git a/gpu/gpu.go b/gpu/gpu.go
> index 0977415b..83135416 100644
> --- a/gpu/gpu.go
> +++ b/gpu/gpu.go
> @@ -1338,10 +1338,9 @@ func decodeToOutlineQuads(qs *quadSplitter, tr f32.Affine2D, pathData []byte) {
>
> // create GPU vertices for transformed r, find the bounds and establish texture transform.
> func (d *drawOps) boundsForTransformedRect(r f32.Rectangle, tr f32.Affine2D) (aux []byte, bnd f32.Rectangle, ptr f32.Affine2D) {
> - if isPureOffset(tr) {
> + if tr.IsOffset() {
> // fast-path to allow blitting of pure rectangles
> - _, _, ox, _, _, oy := tr.Elems()
> - off := f32.Pt(ox, oy)
> + off := tr.GetOffset()
> bnd.Min = r.Min.Add(off)
> bnd.Max = r.Max.Add(off)
> return
> @@ -1392,8 +1391,3 @@ func (d *drawOps) boundsForTransformedRect(r f32.Rectangle, tr f32.Affine2D) (au
>
> return
> }
> -
> -func isPureOffset(t f32.Affine2D) bool {
> - a, b, _, d, e, _ := t.Elems()
> - return a == 1 && b == 0 && d == 0 && e == 1
> -}
> --
> 2.34.2
Re: [PATCH gio 2/5] internal/ops: use lookup table for NumRefs
Re: [PATCH gio 3/5] internal/ops: avoid some bounds checks in decode
Re: [PATCH gio 4/5] internal/ops: use single table for OpType
Thanks. Merged with a single nit.
Elias
On Sat Jul 2, 2022 at 2:49 PM CEST, ~egonelbre wrote:
> From: Egon Elbre <egonelbre@gmail.com >
> +
> +var opSize = [0x100]byte{}
> +
Removed; was unused.
Re: [PATCH gio 5/5] internal/ops: optimize Decode
Merged, although unfortunate. Is there a Go issue for tracking this kind of
inefficiency?
Elias
Re: [PATCH gio 5/5] internal/ops: optimize Decode
Not that I know of, I was mainly focused on making the compiled output
nicer at the moment.
+ Egon
+ Egon
On Sat, Jul 2, 2022 at 9:19 PM Elias Naur <mail@eliasnaur.com > wrote:
>
> Merged, although unfortunate. Is there a Go issue for tracking this kind of
> inefficiency?
>
> Elias