From: Jack Mordaunt <jackmordaunt@gmail.com> Jack Mordaunt (4): doc: explain store methods feat: add helpers refactor: make `CopyInto` optional feat: Orchard store builder.go | 17 +- fields/primitives.go | 28 ++- go.mod | 6 +- go.sum | 6 +- grove/child-cache.go | 3 + grove/grove.go | 13 +- nodes.go | 27 ++- orchard/cursor.go | 87 ++++++++ orchard/index.go | 58 +++++ orchard/orchard.go | 428 +++++++++++++++++++++++++++++++++++ store.go | 15 +- store/archive.go | 5 +- store/cache-store.go | 11 +- store/memory-store.go | 7 +- store/store_test.go | 495 +++++++++++++++++++++++++++++++++++++++-- testkeys/testkeys.go | 15 ++ testutil/node_utils.go | 34 +++ 17 files changed, 1199 insertions(+), 56 deletions(-) create mode 100644 orchard/cursor.go create mode 100644 orchard/index.go create mode 100644 orchard/orchard.go -- 2.30.0.windows.1
Copy & paste the following snippet into your terminal to import this patchset into git:
curl -s https://lists.sr.ht/~whereswaldon/arbor-dev/patches/21785/mbox | git am -3Learn more about email & git
From: Jack Mordaunt <jackmordaunt@gmail.com> --- store.go | 10 +++++++++- store/memory-store.go | 7 ++++++- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/store.go b/store.go index 716be7f..82a206e 100644 --- a/store.go +++ b/store.go @@ -4,13 +4,21 @@ import ( "git.sr.ht/~whereswaldon/forest-go/fields" ) +// Store describes a collection of `forest.Node`. type Store interface { + // CopyInto allows transformations between Store instances. CopyInto(Store) error + // Get retrieves a node by ID. Get(*fields.QualifiedHash) (Node, bool, error) + // GetIdentity retrieves an identity node by ID. GetIdentity(*fields.QualifiedHash) (Node, bool, error) + // GetCommunity retrieves a community node by ID. GetCommunity(*fields.QualifiedHash) (Node, bool, error) + // GetConversation retrieves a conversation node by ID. GetConversation(communityID, conversationID *fields.QualifiedHash) (Node, bool, error) + // GetReply retrieves a reply node by ID. GetReply(communityID, conversationID, replyID *fields.QualifiedHash) (Node, bool, error) + // Children returns a list of child nodes for the given node ID. Children(*fields.QualifiedHash) ([]*fields.QualifiedHash, error) // Recent returns recently-created (as per the timestamp in the node) nodes. // It may return both a slice of nodes and an error if some nodes in the @@ -19,6 +27,6 @@ type Store interface { // Add inserts a node into the store. It is *not* an error to insert a node which is already // stored. Implementations must not return an error in this case. Add(Node) error - + // RemoveSubtree from the store. RemoveSubtree(*fields.QualifiedHash) error } diff --git a/store/memory-store.go b/store/memory-store.go index 0e23ae5..ae35d73 100644 --- a/store/memory-store.go +++ b/store/memory-store.go @@ -8,13 +8,18 @@ import ( "git.sr.ht/~whereswaldon/forest-go/fields" ) +// MemoryStore is an in-memory node store. type MemoryStore struct { - Items map[string]forest.Node + // Items is a flat map of all nodes. + Items map[string]forest.Node + // ChildMap describes the parent-child relationship between nodes. + // Each list of child nodes is keyed by the parent node. ChildMap map[string][]string } var _ forest.Store = &MemoryStore{} +// NewMemoryStore allocates a MemoryStore. func NewMemoryStore() *MemoryStore { return &MemoryStore{ Items: make(map[string]forest.Node), -- 2.30.0.windows.1
From: Jack Mordaunt <jackmordaunt@gmail.com> - `forest.Builder` time generation can be stubbed out for testing - nodes can be queried for node type - mock constructors for nodes that don't require a testing context Signed-off-by: Jack Mordaunt <jackmordaunt@gmail.com> --- builder.go | 17 +++++++++++++---- nodes.go | 27 ++++++++++++++++++++------- testkeys/testkeys.go | 15 +++++++++++++++ testutil/node_utils.go | 34 ++++++++++++++++++++++++++++++++++ 4 files changed, 82 insertions(+), 11 deletions(-) diff --git a/builder.go b/builder.go index 2058ab0..8a0a466 100644 --- a/builder.go @@ -212,7 +212,7 @@ func NewIdentityQualified(signer Signer, name *fields.QualifiedContent, metadata if err != nil { return nil, err } - identity.id = fields.Blob(id) + identity.Identifier = fields.Blob(id) return identity, nil } @@ -221,6 +221,7 @@ func NewIdentityQualified(signer Signer, name *fields.QualifiedContent, metadata type Builder struct { User *Identity Signer + Timer func() time.Time } // As creates a Builder that can write new nodes on behalf of the provided user. @@ -231,9 +232,17 @@ func As(user *Identity, signer Signer) *Builder { return &Builder{ User: user, Signer: signer, + Timer: func() time.Time { return time.Now() }, } } +func (b *Builder) Now() time.Time { + if b.Timer != nil { + return b.Timer() + } + return time.Now() +} + // NewCommunity creates a community node (signed by the given identity with the given privkey). func (n *Builder) NewCommunity(name string, metadata []byte) (*Community, error) { qname, err := fields.NewQualifiedContent(fields.ContentTypeUTF8String, []byte(name)) @@ -288,7 +297,7 @@ func (n *Builder) NewCommunityQualified(name *fields.QualifiedContent, metadata if err != nil { return nil, err } - c.id = fields.Blob(id) + c.Identifier = fields.Blob(id) return c, nil } @@ -310,7 +319,7 @@ func (n *Builder) NewReplyQualified(parent interface{}, content, metadata *field r := newReply() r.Version = fields.CurrentVersion r.Type = fields.NodeTypeReply - r.Created = fields.TimestampFrom(time.Now()) + r.Created = fields.TimestampFrom(n.Now()) switch concreteParent := parent.(type) { case *Community: r.CommunityID = *concreteParent.ID() @@ -360,7 +369,7 @@ func (n *Builder) NewReplyQualified(parent interface{}, content, metadata *field if err != nil { return nil, err } - r.id = fields.Blob(id) + r.Identifier = fields.Blob(id) return r, nil } diff --git a/nodes.go b/nodes.go index 2fc1566..ba096bf 100644 --- a/nodes.go +++ b/nodes.go @@ -31,6 +31,19 @@ type Node interface { encoding.BinaryUnmarshaler } +// Is queries the node for it's concrete type. +func Is(nt fields.NodeType, n Node) bool { + switch n.(type) { + case *Identity: + return nt == fields.NodeTypeIdentity + case *Community: + return nt == fields.NodeTypeCommunity + case *Reply: + return nt == fields.NodeTypeReply + } + return false +} + // NodeTypeOf returns the NodeType of the provided binary-marshaled node. // If the provided bytes are not a forest node or the type cannot be determined, // an error will be returned and the first return value must be ignored. @@ -75,8 +88,8 @@ type SchemaInfo struct { // generic node type CommonNode struct { - // the ID is deterministically computed from the rest of the values - id fields.Blob + // Deterministically computed from the rest of the values. + Identifier fields.Blob SchemaInfo `arbor:"order=0,recurse=always"` Parent fields.QualifiedHash `arbor:"order=1,recurse=serialize"` IDDesc fields.HashDescriptor `arbor:"order=2,recurse=always"` @@ -90,7 +103,7 @@ type CommonNode struct { func (n CommonNode) ID() *fields.QualifiedHash { return &fields.QualifiedHash{ Descriptor: n.IDDesc, - Blob: n.id, + Blob: n.Identifier, } } @@ -99,7 +112,7 @@ func (n CommonNode) CreatedAt() time.Time { } func (n CommonNode) ParentID() *fields.QualifiedHash { - return &fields.QualifiedHash{n.Parent.Descriptor, n.Parent.Blob} + return &fields.QualifiedHash{Descriptor: n.Parent.Descriptor, Blob: n.Parent.Blob} } func (n CommonNode) TreeDepth() fields.TreeDepth { @@ -258,7 +271,7 @@ func (i *Identity) UnmarshalBinary(b []byte) error { if err != nil { return err } - i.id, err = computeID(i) + i.Identifier, err = computeID(i) return err } @@ -340,7 +353,7 @@ func (c *Community) UnmarshalBinary(b []byte) error { if err != nil { return err } - c.id, err = computeID(c) + c.Identifier, err = computeID(c) return err } @@ -428,7 +441,7 @@ func (r *Reply) UnmarshalBinary(b []byte) error { if err != nil { return err } - r.id, err = computeID(r) + r.Identifier, err = computeID(r) return err } diff --git a/testkeys/testkeys.go b/testkeys/testkeys.go index f2be17c..e649014 100644 --- a/testkeys/testkeys.go +++ b/testkeys/testkeys.go @@ -6,6 +6,7 @@ package testkeys import ( "bytes" + "fmt" "testing" "git.sr.ht/~whereswaldon/forest-go" @@ -43,6 +44,20 @@ func Signer(t *testing.T, privKey string) forest.Signer { return signer } +// MockSigner creates a fake signer for use in testing. +// Panics on error. +func MockSigner(privKey string) forest.Signer { + privkey, err := getKey(privKey, TestKeyPassphrase) + if err != nil { + panic(fmt.Errorf("creating private key: %v", err)) + } + signer, err := forest.NewNativeSigner(privkey) + if err != nil { + panic(fmt.Errorf("creating signer: %v", err)) + } + return signer +} + const TestKeyPassphrase = "pleasedonotusethisasapassword" const PrivKey1 = `-----BEGIN PGP PRIVATE KEY BLOCK----- diff --git a/testutil/node_utils.go b/testutil/node_utils.go index c6f1eab..80f6d24 100644 --- a/testutil/node_utils.go +++ b/testutil/node_utils.go @@ -5,6 +5,7 @@ content. package testutil import ( + "fmt" "testing" "git.sr.ht/~whereswaldon/forest-go" @@ -64,3 +65,36 @@ func RandomNodeSlice(length int, t *testing.T) ([]*fields.QualifiedHash, []fores } return ids, nodes } + +// MockIdentity creates a fake identity with a random name for the purposes of +// testing. +func MockIdentity() (*forest.Identity, forest.Signer) { + signer := testkeys.MockSigner(testkeys.PrivKey1) + identity, err := forest.NewIdentity(signer, RandomString(12), []byte{}) + if err != nil { + panic(fmt.Errorf("mocking up identity: %v", err)) + } + return identity, signer +} + +// MockCommunity creates a fake community for the purposes of testing. +func MockCommunity(identity *forest.Builder, name string) *forest.Community { + c, err := identity.NewCommunity(name, []byte{}) + if err != nil { + panic(fmt.Errorf("mocking up community: %v", err)) + } + return c +} + +// MockReply creates a fake reply with random content for the given identity and +// community. +func MockReply( + identity *forest.Builder, + parent forest.Node, +) *forest.Reply { + r, err := identity.NewReply(parent, RandomString(128), []byte{}) + if err != nil { + panic(fmt.Errorf("mocking up reply: %v", err)) + } + return r +} -- 2.30.0.windows.1
From: Jack Mordaunt <jackmordaunt@gmail.com> Various store types weren't implementing this proper, so that indicates it's not necessary for the interface. Signed-off-by: Jack Mordaunt <jackmordaunt@gmail.com> --- store.go | 7 +++++-- store/archive.go | 5 +++-- store/cache-store.go | 11 ++++++++--- 3 files changed, 16 insertions(+), 7 deletions(-) diff --git a/store.go b/store.go index 82a206e..efa6ecb 100644 --- a/store.go +++ b/store.go @@ -6,8 +6,6 @@ import ( // Store describes a collection of `forest.Node`. type Store interface { - // CopyInto allows transformations between Store instances. - CopyInto(Store) error // Get retrieves a node by ID. Get(*fields.QualifiedHash) (Node, bool, error) // GetIdentity retrieves an identity node by ID. @@ -30,3 +28,8 @@ type Store interface { // RemoveSubtree from the store. RemoveSubtree(*fields.QualifiedHash) error } + +// Copiable stores can copy themselves into another store. +type Copiable interface { + CopyInto(Store) error +} diff --git a/store/archive.go b/store/archive.go index 57ee6cb..4a6618a 100644 --- a/store/archive.go +++ b/store/archive.go @@ -120,7 +120,9 @@ func (m *Archive) unsubscribeInMap(targetMap map[Subscription]func(forest.Node), func (m *Archive) CopyInto(s forest.Store) (err error) { m.executeAsync(func() { - err = m.store.CopyInto(s) + if c, ok := m.store.(forest.Copiable); ok { + err = c.CopyInto(s) + } }) return } @@ -131,7 +133,6 @@ func (m *Archive) Get(id *fields.QualifiedHash) (node forest.Node, present bool, }) return } - func (m *Archive) GetIdentity(id *fields.QualifiedHash) (node forest.Node, present bool, err error) { m.executeAsync(func() { node, present, err = m.store.GetIdentity(id) diff --git a/store/cache-store.go b/store/cache-store.go index bd9f8d8..1db0090 100644 --- a/store/cache-store.go +++ b/store/cache-store.go @@ -26,8 +26,10 @@ var _ forest.Store = &CacheStore{} // fast in-memory implementations as the `cache` layer and disk or // network-based implementations as the `base` layer. func NewCacheStore(cache, back forest.Store) (*CacheStore, error) { - if err := cache.CopyInto(back); err != nil { - return nil, err + if c, ok := cache.(forest.Copiable); ok { + if err := c.CopyInto(back); err != nil { + return nil, err + } } return &CacheStore{cache, back}, nil } @@ -40,7 +42,10 @@ func (m *CacheStore) Get(id *fields.QualifiedHash) (forest.Node, bool, error) { } func (m *CacheStore) CopyInto(other forest.Store) error { - return m.Back.CopyInto(other) + if c, ok := m.Back.(forest.Copiable); ok { + return c.CopyInto(other) + } + return fmt.Errorf("cannot copy store: Copiable interface not implemented %T", m.Back) } // Add inserts the given node into both stores of the CacheStore -- 2.30.0.windows.1
From: Jack Mordaunt <jackmordaunt@gmail.com> fix: return nil as the consistent behaviour Testing failed when comparing an empty slice to a nil slice. test(loadStore): rm unused memory store style(spelling): fix typos test: ensure empty store returns nil node perf(orchard.Using): init all buckets inside single tx fix(orchard.Open): tighten file permissions for unix systems refactor(byteorder): use a single byteorder definition throughout codebase doc(indexCursor): explain how iteration works refactor(indexCursor): drop abstract Map method fix(indexCursor.First): return nil when index is nil Without this check, calling orchard.Recent before adding a node will cause a panic. This occurs because the index's sub-bucket doesn't get created, until a node is added. This occurs in Sprig, where Recent is called before Add. Signed-off-by: Jack Mordaunt <jackmordaunt@gmail.com> --- fields/primitives.go | 28 +-- go.mod | 6 +- go.sum | 6 +- grove/child-cache.go | 3 + grove/grove.go | 13 +- orchard/cursor.go | 87 ++++++++ orchard/index.go | 58 +++++ orchard/orchard.go | 428 +++++++++++++++++++++++++++++++++++++ store/store_test.go | 495 +++++++++++++++++++++++++++++++++++++++++-- 9 files changed, 1087 insertions(+), 37 deletions(-) create mode 100644 orchard/cursor.go create mode 100644 orchard/index.go create mode 100644 orchard/orchard.go diff --git a/fields/primitives.go b/fields/primitives.go index 9782b70..74e9f3e 100644 --- a/fields/primitives.go +++ b/fields/primitives.go @@ -18,9 +18,9 @@ const ( HashDigestLengthSHA512_256 ContentLength = 32 ) -// multiByteSerializationOrder defines the order in which multi-byte +// MultiByteSerializationOrder defines the order in which multi-byte // integers are serialized into binary -var multiByteSerializationOrder binary.ByteOrder = binary.BigEndian +var MultiByteSerializationOrder binary.ByteOrder = binary.BigEndian // fundamental types type genericType uint8 @@ -29,13 +29,13 @@ const sizeofgenericType = 1 func (g genericType) MarshalBinary() ([]byte, error) { b := new(bytes.Buffer) - err := binary.Write(b, multiByteSerializationOrder, g) + err := binary.Write(b, MultiByteSerializationOrder, g) return b.Bytes(), err } func (g *genericType) UnmarshalBinary(b []byte) error { buf := bytes.NewBuffer(b) - return binary.Read(buf, multiByteSerializationOrder, g) + return binary.Read(buf, MultiByteSerializationOrder, g) } func (g *genericType) BytesConsumed() int { @@ -68,7 +68,7 @@ func NewContentLength(size int) (*ContentLength, error) { // MarshalBinary converts the ContentLength into its binary representation func (c ContentLength) MarshalBinary() ([]byte, error) { b := new(bytes.Buffer) - err := binary.Write(b, multiByteSerializationOrder, c) + err := binary.Write(b, MultiByteSerializationOrder, c) return b.Bytes(), err } @@ -90,7 +90,7 @@ func (c *ContentLength) UnmarshalText(b []byte) error { // back to its structured form func (c *ContentLength) UnmarshalBinary(b []byte) error { buf := bytes.NewBuffer(b) - return binary.Read(buf, multiByteSerializationOrder, c) + return binary.Read(buf, MultiByteSerializationOrder, c) } func (c *ContentLength) BytesConsumed() int { @@ -109,7 +109,7 @@ const sizeofTreeDepth = 4 // MarshalBinary converts the TreeDepth into its binary representation func (t TreeDepth) MarshalBinary() ([]byte, error) { b := new(bytes.Buffer) - err := binary.Write(b, multiByteSerializationOrder, t) + err := binary.Write(b, MultiByteSerializationOrder, t) return b.Bytes(), err } @@ -121,7 +121,7 @@ func (t TreeDepth) MarshalText() ([]byte, error) { // back to its structured form func (t *TreeDepth) UnmarshalBinary(b []byte) error { buf := bytes.NewBuffer(b) - return binary.Read(buf, multiByteSerializationOrder, t) + return binary.Read(buf, MultiByteSerializationOrder, t) } func (t *TreeDepth) BytesConsumed() int { @@ -190,7 +190,7 @@ const sizeofVersion = 2 // MarshalBinary converts the Version into its binary representation func (v Version) MarshalBinary() ([]byte, error) { b := new(bytes.Buffer) - err := binary.Write(b, multiByteSerializationOrder, v) + err := binary.Write(b, MultiByteSerializationOrder, v) return b.Bytes(), err } @@ -202,7 +202,7 @@ func (v Version) MarshalText() ([]byte, error) { // back to its structured form func (v *Version) UnmarshalBinary(b []byte) error { buf := bytes.NewBuffer(b) - return binary.Read(buf, multiByteSerializationOrder, v) + return binary.Read(buf, MultiByteSerializationOrder, v) } func (v *Version) BytesConsumed() int { @@ -234,7 +234,7 @@ func (t Timestamp) Time() time.Time { // MarshalBinary converts the Timestamp into its binary representation func (v Timestamp) MarshalBinary() ([]byte, error) { b := new(bytes.Buffer) - err := binary.Write(b, multiByteSerializationOrder, v) + err := binary.Write(b, MultiByteSerializationOrder, v) return b.Bytes(), err } @@ -246,7 +246,7 @@ func (v Timestamp) MarshalText() ([]byte, error) { // back to its structured form func (v *Timestamp) UnmarshalBinary(b []byte) error { buf := bytes.NewBuffer(b) - return binary.Read(buf, multiByteSerializationOrder, v) + return binary.Read(buf, MultiByteSerializationOrder, v) } func (v *Timestamp) BytesConsumed() int { @@ -306,6 +306,10 @@ func (t *NodeType) Equals(t2 *NodeType) bool { return ((*genericType)(t)).Equals((*genericType)(t2)) } +func (t NodeType) String() string { + return NodeTypeNames[t] +} + type HashType genericType const ( diff --git a/go.mod b/go.mod index b5fada4..d865fa7 100644 --- a/go.mod +++ b/go.mod @@ -1,6 +1,10 @@ module git.sr.ht/~whereswaldon/forest-go -require golang.org/x/crypto v0.0.0-20201016220609-9e8e0b390897 +require ( + github.com/boltdb/bolt v1.3.1 + golang.org/x/crypto v0.0.0-20210220033148-5ea612d1eb83 + golang.org/x/sys v0.0.0-20200202164722-d101bd2416d5 // indirect +) replace golang.org/x/crypto => github.com/ProtonMail/crypto v0.0.0-20201022141144-3fe6b6992c0f diff --git a/go.sum b/go.sum index 30bdd5c..334aae8 100644 --- a/go.sum +++ b/go.sum @@ -1,7 +1,9 @@ github.com/ProtonMail/crypto v0.0.0-20201022141144-3fe6b6992c0f h1:CrqdTsoF7teMqQok+iHUx3yjYJfkpDuU7y/nIxRJ2rY= github.com/ProtonMail/crypto v0.0.0-20201022141144-3fe6b6992c0f/go.mod h1:Pxr7w4gA2ikI4sWyYwEffm+oew1WAJHzG1SiDpQMkrI= +github.com/boltdb/bolt v1.3.1 h1:JQmyP4ZBrce+ZQu0dY660FMfatumYDLun9hBCUVIkF4= +github.com/boltdb/bolt v1.3.1/go.mod h1:clJnj/oiGkjum5o1McbSZDSLxVThjynRyGBgiAx27Ps= golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= -golang.org/x/sys v0.0.0-20190412213103-97732733099d h1:+R4KGOnez64A81RvjARKc4UT5/tI9ujCIVX+P5KiHuI= golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/text v0.3.0 h1:g61tztE5qeGQ89tm6NTjjM9VPIm088od1l6aSorWRWg= +golang.org/x/sys v0.0.0-20200202164722-d101bd2416d5 h1:LfCXLvNmTYH9kEmVgqbnsWfruoXZIrh4YBgqVHtDvw0= +golang.org/x/sys v0.0.0-20200202164722-d101bd2416d5/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= diff --git a/grove/child-cache.go b/grove/child-cache.go index caf9c07..792dd4d 100644 --- a/grove/child-cache.go +++ b/grove/child-cache.go @@ -35,6 +35,9 @@ func (c *ChildCache) Get(parent *fields.QualifiedHash) ([]*fields.QualifiedHash, if !inMap { return nil, false } + if len(submap) == 0 { + return nil, true + } out := make([]*fields.QualifiedHash, 0, len(submap)) for _, child := range submap { out = append(out, child) diff --git a/grove/grove.go b/grove/grove.go index 70151b3..3b39fc3 100644 --- a/grove/grove.go +++ b/grove/grove.go @@ -306,7 +306,7 @@ func (g *Grove) Children(id *fields.QualifiedHash) ([]*fields.QualifiedHash, err } children, inCache = g.ChildCache.Get(id) if !inCache { - return []*fields.QualifiedHash{}, nil + return nil, nil } return children, nil @@ -500,7 +500,16 @@ func (g *Grove) GetReply(communityID, conversationID, replyID *fields.QualifiedH // of the functionality that can be implemented simply. However, it is // implementable, and should be done as soon as is feasible. func (g *Grove) CopyInto(other forest.Store) error { - return fmt.Errorf("method CopyInto() is not currently implemented on Grove") + nodes, err := g.allNodes() + if err != nil { + return err + } + for _, n := range nodes { + if err := other.Add(n); err != nil { + return fmt.Errorf("copying node: %w", err) + } + } + return nil } // RemoveSubtree removes the subtree rooted at the node diff --git a/orchard/cursor.go b/orchard/cursor.go new file mode 100644 index 0000000..2617037 --- /dev/null +++ b/orchard/cursor.go @@ -0,0 +1,87 @@ +package orchard + +import "github.com/boltdb/bolt" + +// indexCursor uses an index bucket to iterate over the parent bucket. +// Collided index entries are flattened in the order they were inserted. +type indexCursor struct { + // Parent bucket contains the real values being indexed. + Parent *bolt.Bucket + // Index bucket contains the index values, which are some arbitrary key + // mapped to entries in the Parent bucket. + Index *bolt.Bucket + // Reverse iterates backward, if true. + Reverse bool + + // cursor holds the current state of the bucket cursor. + cursor *bolt.Cursor + // collisions holds the current state of the collision bucket cursor. + // If collisions is not nil, we are iterating over collided entries for a + // given key. + collisions *bolt.Cursor +} + +// First returns the first k,v pair. +// This method initializes the underlying cursor and must be called before calls +// to `indexCursor.Next`. +func (c *indexCursor) First() ([]byte, []byte) { + if c.Index == nil { + return nil, nil + } + c.cursor = c.Index.Cursor() + k, v := c.first(c.cursor) + if k == nil { + return nil, nil + } + if v != nil { + id := v + return id, c.Parent.Get(id) + } + if b := c.Index.Bucket(k); b != nil { + c.collisions = b.Cursor() + _, id := c.first(c.collisions) + return id, c.Parent.Get(id) + } + return nil, nil +} + +// Next returns the next k,v pair until k is nil. +func (c *indexCursor) Next() ([]byte, []byte) { + var cursor = c.cursor + if c.collisions != nil { + cursor = c.collisions + } + k, v := c.next(cursor) + if k == nil { + if c.collisions != nil { + c.collisions = nil + _, id := c.next(c.cursor) + return id, c.Parent.Get(id) + } + return nil, nil + } + if v != nil { + id := v + return id, c.Parent.Get(id) + } + if b := c.Index.Bucket(k); b != nil { + c.collisions = b.Cursor() + _, id := c.first(c.collisions) + return id, c.Parent.Get(id) + } + return nil, nil +} + +func (c *indexCursor) next(cursor *bolt.Cursor) ([]byte, []byte) { + if c.Reverse { + return cursor.Prev() + } + return cursor.Next() +} + +func (c *indexCursor) first(cursor *bolt.Cursor) ([]byte, []byte) { + if c.Reverse { + return cursor.Last() + } + return cursor.First() +} diff --git a/orchard/index.go b/orchard/index.go new file mode 100644 index 0000000..3cb03bd --- /dev/null +++ b/orchard/index.go @@ -0,0 +1,58 @@ +package orchard + +import ( + "git.sr.ht/~whereswaldon/forest-go/fields" + "github.com/boltdb/bolt" +) + +// index maps keys to node IDs for accelerated queries. +// On collision, all values for the collided key are placed into a bucket. +// Must be used inside a transaction. +type index struct { + *bolt.Bucket +} + +// Put a kv pair into the index. +// If an entry for k exists, all values are inserted into a bucket for that +// key. +func (idx index) Put(k []byte, v []byte) error { + nextID := func(b *bolt.Bucket) ([]byte, error) { + // Generate a unique ID for this sub-entry. + // ID is not used other than by the database for purposes of + // sorting and iterating. + kint, err := b.NextSequence() + if err != nil { + return nil, err + } + k := make([]byte, 8) + fields.MultiByteSerializationOrder.PutUint64(k, kint) + return k, nil + } + if collision := idx.Bucket.Get(k); collision != nil { + // Move any existing value into a collision bucket. + if err := idx.Bucket.Delete(k); err != nil { + return err + } + collisions, err := idx.Bucket.CreateBucketIfNotExists(k) + if err != nil { + return err + } + k, err := nextID(collisions) + if err != nil { + return err + } + if err := collisions.Put(k, collision); err != nil { + return err + } + } + if collisions := idx.Bucket.Bucket(k); collisions != nil { + // Place the new value into the collisions bucket. + k, err := nextID(collisions) + if err != nil { + return err + } + return collisions.Put(k, v) + } + // No collision, place value into the root bucket. + return idx.Bucket.Put(k, v) +} diff --git a/orchard/orchard.go b/orchard/orchard.go new file mode 100644 index 0000000..40edb84 --- /dev/null +++ b/orchard/orchard.go @@ -0,0 +1,428 @@ +// Package orchard implements a boltdb backed on-disk node store, satisfying the +// forest.Store interface. +// +// This database is a single file that serializes nodes into buckets. +// Various indexes are used to accelerate important queries. +// +// As a result of using boltdb, Orchard prefers read heavy workloads. +package orchard + +import ( + "fmt" + "io" + + "git.sr.ht/~whereswaldon/forest-go" + "git.sr.ht/~whereswaldon/forest-go/fields" + "git.sr.ht/~whereswaldon/forest-go/store" + "github.com/boltdb/bolt" +) + +// Bucket contains the name of a bucket in bytes. +type Bucket []byte + +func (b Bucket) String() string { + return string(b) +} + +var ( + // Buckets are the storage primitive used by bolt, that contain a sorted + // list of key-value pairs. Each bucket is homogeneous. + BucketReply Bucket = Bucket("Reply") + BucketIdentity Bucket = Bucket("Identity") + BucketCommunity Bucket = Bucket("Community") + + // Indexes are used to speed up queries. + IndexAge Bucket = Bucket("Age") + IndexType Bucket = Bucket("Type") + IndexChildren Bucket = Bucket("Children") + + Buckets []Bucket = []Bucket{ + BucketReply, + BucketIdentity, + BucketCommunity, + } + + Indexes []Bucket = []Bucket{ + IndexAge, + IndexType, + IndexChildren, + } +) + +// Orchard is a database-backed node store for `forest.Node`. +// Nodes are persisted as schema entities and can be queried as such. +type Orchard struct { + *bolt.DB + ReadCache *store.MemoryStore +} + +// Open a database file at the given path using the standard OS filesystem. +func Open(path string) (*Orchard, error) { + db, err := bolt.Open(path, 0660, nil) + if err != nil { + return nil, fmt.Errorf("opening database file: %w", err) + } + return Using(db) +} + +// Using allocates an Orchard using the provided database handle. +func Using(db *bolt.DB) (*Orchard, error) { + if err := db.Update(func(tx *bolt.Tx) error { + for _, b := range append(Buckets, Indexes...) { + _, err := tx.CreateBucketIfNotExists(b) + if err != nil { + return fmt.Errorf("init %s: %w", b, err) + } + } + return nil + }); err != nil { + return nil, fmt.Errorf("init database: %w", err) + } + return &Orchard{ + DB: db, + ReadCache: store.NewMemoryStore(), + }, nil +} + +// Add inserts the node into the orchard. If the given node is already in the +// orchard, Add will do nothing. It is not an error to insert a node more than +// once. +func (o *Orchard) Add(node forest.Node) error { + if _, ok, err := o.Get(node.ID()); err != nil { + return fmt.Errorf("checking existence of node: %w", err) + } else if ok { + return nil + } + id, err := node.ID().MarshalText() + if err != nil { + return fmt.Errorf("serializing node ID: %w", err) + } + v, err := node.MarshalBinary() + if err != nil { + return fmt.Errorf("serializing node: %w", err) + } + typeIndex := func(tx *bolt.Tx, nt fields.NodeType) error { + v, err := nt.MarshalBinary() + if err != nil { + return err + } + return index{Bucket: tx.Bucket(IndexType)}.Put(id, v) + } + ageIndex := func(tx *bolt.Tx, nt fields.NodeType, ts fields.Timestamp) error { + k, err := ts.MarshalBinary() + if err != nil { + return err + } + b, err := tx.Bucket(IndexAge).CreateBucketIfNotExists(bucketFromNodeType(nt)) + if err != nil { + return err + } + return index{Bucket: b}.Put(k, id) + } + childIndex := func(tx *bolt.Tx) error { + k, err := node.ParentID().MarshalText() + if err != nil { + return err + } + return index{Bucket: tx.Bucket(IndexChildren)}.Put(k, id) + } + return o.DB.Update(func(tx *bolt.Tx) error { + switch n := node.(type) { + case *forest.Reply: + if err := tx.Bucket(BucketReply).Put(id, v); err != nil { + return fmt.Errorf("updating bucket: %w", err) + } + if err := typeIndex(tx, n.Type); err != nil { + return fmt.Errorf("updating Type index: %w", err) + } + if err := ageIndex(tx, n.Type, n.Created); err != nil { + return fmt.Errorf("updating Age index: %w", err) + } + + case *forest.Identity: + if err := tx.Bucket(BucketIdentity).Put(id, v); err != nil { + return fmt.Errorf("updating bucket: %w", err) + } + if err := typeIndex(tx, n.Type); err != nil { + return fmt.Errorf("updating Type index: %w", err) + } + if err := ageIndex(tx, n.Type, n.Created); err != nil { + return fmt.Errorf("updating Age index: %w", err) + } + case *forest.Community: + if err := tx.Bucket(BucketCommunity).Put(id, v); err != nil { + return fmt.Errorf("updating bucket: %w", err) + } + if err := typeIndex(tx, n.Type); err != nil { + return fmt.Errorf("updating Type index: %w", err) + } + if err := ageIndex(tx, n.Type, n.Created); err != nil { + return fmt.Errorf("updating Age index: %w", err) + } + } + return childIndex(tx) + }) +} + +// Get searches for a node with the given id. +// Present indicates whether the node exists, err indicates a failure to load it. +func (o *Orchard) Get(nodeID *fields.QualifiedHash) (node forest.Node, present bool, err error) { + if n, ok, _ := o.ReadCache.Get(nodeID); ok { + return n, ok, nil + } + defer func() { + if err == nil && node != nil { + _ = o.ReadCache.Add(node) + } + }() + var ( + nt fields.NodeType + u union + ) + id, err := nodeID.MarshalText() + if err != nil { + return nil, false, fmt.Errorf("serializing node ID: %w", err) + } + return node, present, o.DB.View(func(tx *bolt.Tx) error { + v := tx.Bucket(IndexType).Get(id) + if v == nil { + node = nil + present = false + err = nil + return nil + } + present = true + if err := nt.UnmarshalBinary(v); err != nil && err != io.EOF { + return fmt.Errorf("loading node type: %w", err) + } + if err := u.UnmarshalBinary(tx.Bucket(bucketFromNodeType(nt)).Get(id)); err != nil { + return fmt.Errorf("deserializing node: %w", err) + } + node = u.Node() + return nil + }) +} + +func (o *Orchard) GetIdentity(id *fields.QualifiedHash) (forest.Node, bool, error) { + return o.Get(id) +} + +func (o *Orchard) GetCommunity(id *fields.QualifiedHash) (forest.Node, bool, error) { + return o.Get(id) +} + +func (o *Orchard) GetConversation(communityID, id *fields.QualifiedHash) (forest.Node, bool, error) { + return o.Get(id) +} + +func (o *Orchard) GetReply(communityID, conversationID, id *fields.QualifiedHash) (forest.Node, bool, error) { + return o.Get(id) +} + +// Children returns the IDs of all known child nodes of the specified ID. +func (o *Orchard) Children(parent *fields.QualifiedHash) (children []*fields.QualifiedHash, err error) { + k, err := parent.MarshalText() + if err != nil { + return nil, fmt.Errorf("serializing parent node: %w", err) + } + return children, o.DB.View(func(tx *bolt.Tx) error { + if child := tx.Bucket(IndexChildren).Get(k); child != nil { + var id fields.QualifiedHash + if err := id.UnmarshalText(child); err != nil { + return fmt.Errorf("deserializing node ID: %w", err) + } + children = append(children, &id) + return nil + } + if b := tx.Bucket(IndexChildren).Bucket(k); b != nil { + c := b.Cursor() + for k, v := c.First(); k != nil; k, v = c.Next() { + var id fields.QualifiedHash + if err := id.UnmarshalText(v); err != nil { + return fmt.Errorf("deserializing node ID: %w", err) + } + children = append(children, &id) + } + } + return nil + }) +} + +// Recent returns a slice of nodes of a given type ordered by recency, youngest +// first. +// +// NOTE: this function may return both a valid slice of nodes and an error +// in the case that some nodes failed to be unmarshaled from disk, but others +// were successful. Calling code should always check whether the node list is +// empty before throwing it away. +func (o *Orchard) Recent(nt fields.NodeType, n int) (nodes []forest.Node, err error) { + var ( + data = make([][]byte, n) + b = bucketFromNodeType(nt) + ii = 0 + errors Errors + ) + if err := o.DB.View(func(tx *bolt.Tx) error { + c := indexCursor{ + Parent: tx.Bucket(b), + Index: tx.Bucket(IndexAge).Bucket(b), + Reverse: true, + } + for k, v := c.First(); k != nil && ii < n; k, v = c.Next() { + data[ii] = make([]byte, len(v)) + copy(data[ii], v) + ii++ + } + return nil + }); err != nil { + return nodes, fmt.Errorf("copying out node data: %w", err) + } + for ii := range data { + n, err := forest.UnmarshalBinaryNode(data[ii]) + if err == io.EOF { + break + } + if err != nil { + errors = append(errors, fmt.Errorf("deserializing node: %w", err)) + continue + } + nodes = append(nodes, n) + } + if len(errors) > 0 { + return nodes, errors + } + return nodes, nil +} + +// CopyInto copies all nodes from the store into the provided store. +func (o *Orchard) CopyInto(other forest.Store) error { + var data [][]byte + if err := o.DB.View(func(tx *bolt.Tx) error { + for _, b := range Buckets { + c := tx.Bucket(b).Cursor() + for k, v := c.First(); k != nil; k, v = c.Next() { + by := make([]byte, len(v)) + copy(by, v) + data = append(data, by) + } + } + return nil + }); err != nil { + return fmt.Errorf("copying out node data: %w", err) + } + for _, by := range data { + n, err := forest.UnmarshalBinaryNode(by) + if err != nil { + return fmt.Errorf("deserializing node: %w", err) + } + if err := other.Add(n); err != nil { + return fmt.Errorf("copying node: %w", err) + } + } + return nil +} + +// RemoveSubtree removes the subtree rooted at the node with the provided ID +// from the orchard. +func (o *Orchard) RemoveSubtree(id *fields.QualifiedHash) error { + node, ok, err := o.Get(id) + if err != nil { + return err + } + if !ok { + return nil + } + children, err := o.Children(id) + if err != nil { + return err + } + for _, child := range children { + if err := o.RemoveSubtree(child); err != nil { + return err + } + } + return o.delete(node) +} + +func (o *Orchard) delete(n forest.Node) error { + id, err := n.ID().MarshalBinary() + if err != nil { + return fmt.Errorf("serializing node ID: %w", err) + } + return o.DB.Update(func(tx *bolt.Tx) error { + return tx.Bucket(bucketFromNode(n)).Delete(id) + }) +} + +// union lays out memory big enough to fit all three forest node types. +// +// Note(jfm): there may be a better way of allocating hetrogeneous data. +type union struct { + forest.Reply + forest.Identity + forest.Community +} + +func (u *union) Node() forest.Node { + if len(u.Reply.Identifier) > 0 { + return &u.Reply + } + if len(u.Identity.Identifier) > 0 { + return &u.Identity + } + if len(u.Community.Identifier) > 0 { + return &u.Community + } + return nil +} + +func (u *union) UnmarshalBinary(b []byte) error { + buf := make([]byte, len(b)) + copy(buf, b) + n, err := forest.UnmarshalBinaryNode(buf) + if err != nil { + return err + } + switch n := n.(type) { + case *forest.Reply: + u.Reply = *n + case *forest.Identity: + u.Identity = *n + case *forest.Community: + u.Community = *n + } + return nil +} + +// bucketFromNodeType returns the corresponding bucket for a node type. +func bucketFromNodeType(nt fields.NodeType) Bucket { + switch nt { + case fields.NodeTypeReply: + return BucketReply + case fields.NodeTypeIdentity: + return BucketIdentity + case fields.NodeTypeCommunity: + return BucketCommunity + } + return nil +} + +// bucketFromNode returns the corresponding bucket for a node. +func bucketFromNode(n forest.Node) Bucket { + switch n.(type) { + case *forest.Reply: + return BucketReply + case *forest.Identity: + return BucketIdentity + case *forest.Community: + return BucketCommunity + } + return nil +} + +// Errors wraps multiple errors into a single return value. +type Errors []error + +func (e Errors) Error() string { + return fmt.Sprintf("%v", []error(e)) +} diff --git a/store/store_test.go b/store/store_test.go index 17eeddd..0052378 100644 --- a/store/store_test.go +++ b/store/store_test.go @@ -1,10 +1,19 @@ package store_test import ( + "errors" + "fmt" + "math/rand" + "os" + "path/filepath" + "reflect" "testing" + "time" forest "git.sr.ht/~whereswaldon/forest-go" "git.sr.ht/~whereswaldon/forest-go/fields" + "git.sr.ht/~whereswaldon/forest-go/grove" + "git.sr.ht/~whereswaldon/forest-go/orchard" "git.sr.ht/~whereswaldon/forest-go/store" "git.sr.ht/~whereswaldon/forest-go/testutil" ) @@ -14,6 +23,179 @@ func TestMemoryStore(t *testing.T) { testStandardStoreInterface(t, s, "MemoryStore") } +func TestGroveStore(t *testing.T) { + path := filepath.Join(os.TempDir(), "grove") + if err := func() error { + if err := os.RemoveAll(path); err != nil { + return err + } + if err := os.MkdirAll(path, 0777); err != nil { + return err + } + return nil + }(); err != nil { + t.Skipf("preparing filesystem: %v", err) + } + g, err := grove.New(path) + if err != nil { + t.Skipf("preparing filesystem: %v", err) + } + testStandardStoreInterface(t, g, "Grove") +} + +// TestOrchardStore exercises the Orchard against the Store interface. +func TestOrchardStore(t *testing.T) { + setup := func() (o *orchard.Orchard, g *grove.Grove, cleanup func()) { + db := filepath.Join(os.TempDir(), "orchard", "orchard_test.db") + _ = os.Remove(db) + if err := os.MkdirAll(filepath.Dir(db), 0777); err != nil { + t.Skipf("preparing database file: %v", err) + } + s, err := orchard.Open(db) + if err != nil { + t.Skipf("opening database: %v", err) + } + // Setup mock grove to test against - grove is used as the expected + // behaviour. + var ( + path = filepath.Join(os.TempDir(), "mock_grove") + generate bool + ) + if _, err := os.Stat(path); os.IsNotExist(err) { + generate = true + } + if err := os.MkdirAll(path, 0777); err != nil { + t.Skipf("preparing path to mock grove: %v", err) + } + g, err = grove.New(path) + if err != nil { + t.Skipf("setting up mock grove: %v", err) + } + if generate { + GenerateCommunity(10, 10, g) + } + return s, g, func() { + if err := s.DB.Close(); err != nil { + t.Logf("closing database: %v", err) + } + } + } + var ( + o *orchard.Orchard + g *grove.Grove + cleanup func() + ) + o, g, cleanup = setup() + testOrchard(t, o, g, "Orchard") + cleanup() + o, _, cleanup = setup() + testStandardStoreInterface(t, o, "Orchard") + cleanup() +} + +// testOrchard tests against a mock store pre-populated with nodes. +// The nodes must be copied over, be retrievable, and queried correctly. +// Memory store is used as a convenience to iterate over all nodes in memory. +func testOrchard(t *testing.T, s, mock forest.Store, prefix string) { + var mem = store.NewMemoryStore() + copier, ok := mock.(forest.Copiable) + if !ok { + t.Fatalf("mocked store must implement forest.Copiable") + } + if err := copier.CopyInto(s); err != nil { + t.Fatalf("populating store with mock nodes: %v", err) + } + if err := copier.CopyInto(mem); err != nil { + t.Fatalf("populating mem store with mock nodes: %v", err) + } + for _, n := range mem.Items { + got, ok, err := s.Get(n.ID()) + if err != nil { + t.Errorf("%s: getting node from store: %v", prefix, err) + } + if !ok { + t.Errorf("%s: node not gettable from store: %T %s", prefix, n, n.ID()) + } + if ok && got == nil { + t.Errorf("%s: expected non-nil node: %v", prefix, got) + } + if got != nil && !got.Equals(n) { + t.Errorf("%s: got node not equal to original node", prefix) + } + } + for _, parent := range mem.Items { + got, err := s.Children(parent.ID()) + if err != nil { + t.Errorf("%s: retrieving children: %v", prefix, err) + } + want, err := mem.Children(parent.ID()) + if err != nil { + t.Errorf("%s: computing children: %v", prefix, err) + } + for _, w := range want { + var contains bool + for _, g := range got { + if w.Equals(g) { + contains = true + break + } + } + if !contains { + n, _, err := mem.Get(w) + if err != nil { + t.Errorf("%s: getting missing node: %v", prefix, err) + } else { + t.Errorf("%s: missing: %T %s of parent %T %s", prefix, n, n.ID(), parent, parent.ID()) + } + } + } + for _, g := range got { + var contains bool + for _, w := range want { + if g.Equals(w) { + contains = true + break + } + } + if !contains { + n, _, err := s.Get(g) + if err != nil { + t.Errorf("%s: getting extraneous node: %v", prefix, err) + } else { + t.Errorf("%s: extraneous: %T %s to parent %T %s", prefix, n, n.ID(), parent, parent.ID()) + } + } + } + } + for _, nt := range []fields.NodeType{ + fields.NodeTypeReply, + fields.NodeTypeIdentity, + fields.NodeTypeCommunity, + } { + const maxquery = 1000 + for ii := 0; ii < maxquery; ii += (ii + 1) { + query := ii + got, err := s.Recent(nt, query) + if err != nil { + t.Fatalf("%s: unexpected error getting recent nodes: %v", prefix, err) + } + if !isHomogeneous(nt, got) { + t.Fatalf("%s Recent(%s, %d) has hetrogeneous nodes", prefix, nt, query) + } + want, err := mock.Recent(nt, query) + if err != nil { + t.Fatalf("%s: unexpected error getting recent nodes: %v", prefix, err) + } + if !isHomogeneous(nt, want) { + t.Fatalf("%s Recent(%s, %d) has hetrogeneous nodes", prefix, nt, query) + } + if !reflect.DeepEqual(infos(want), infos(got)) { + t.Fatalf("%s: failed query Recent(%s, %d)", prefix, nt, query) + } + } + } +} + func testStandardStoreInterface(t *testing.T, s forest.Store, storeImplName string) { // create three test nodes, one of each type identity, _, community, reply := testutil.MakeReplyOrSkip(t) @@ -40,7 +222,7 @@ func testStandardStoreInterface(t *testing.T, s forest.Store, storeImplName stri } else if err != nil { t.Errorf("Empty %s Get() should not err with %s", storeImplName, err) } else if node != nil { - t.Errorf("Empty %s Get() should return none-nil node %v", storeImplName, node) + t.Errorf("Empty %s Get() should return nil node, got %s", storeImplName, node) } } } @@ -60,6 +242,9 @@ func testStandardStoreInterface(t *testing.T, s forest.Store, storeImplName stri {identity, []string{"get", "identity"}}, {community, []string{"get", "community"}}, {reply, []string{"get", "conversation", "reply"}}, + {identity, []string{"get"}}, + {community, []string{"get"}}, + {reply, []string{"get"}}, } // ensure all getters work for each node @@ -79,21 +264,21 @@ func testStandardStoreInterface(t *testing.T, s forest.Store, storeImplName stri // map nodes to the children that they ought to have within the store nodesToChildren := []struct { forest.Node - children []*fields.QualifiedHash + children []forest.Node }{ - {identity, []*fields.QualifiedHash{}}, - {community, []*fields.QualifiedHash{reply.ID()}}, - {reply, []*fields.QualifiedHash{}}, + {identity, []forest.Node{}}, + {community, []forest.Node{reply}}, + {reply, []forest.Node{}}, } // check each node has its proper children for _, childConfig := range nodesToChildren { if children, err := s.Children(childConfig.ID()); err != nil { - t.Errorf("%s should not error fetching children of %v", storeImplName, childConfig.ID()) + t.Errorf("%s should not error fetching children of %v: %q", storeImplName, childConfig.ID().Blob, err) } else { for _, child := range childConfig.children { - if !containsID(children, child) { - t.Errorf("%s should have %v as a child of %v", storeImplName, child, childConfig.ID()) + if !containsID(children, child.ID()) { + t.Errorf("%s should have %T %v as a child of %T %v (got %v)", storeImplName, child, child.ID(), childConfig.Node, childConfig.ID(), children) } } } @@ -122,7 +307,7 @@ func testStandardStoreInterface(t *testing.T, s forest.Store, storeImplName stri if err != nil { t.Errorf("Recent failed on valid input: %v", err) } else if len(recentNodes) < 1 { - t.Errorf("Recent on store with data returned too few results") + t.Errorf("Recent on store with data returned too few results: want %d, got %d", 1, len(recentNodes)) } else if !recentNodes[0].Equals(run.atZero) { t.Errorf("Expected most recent node to be the newly created one") } @@ -130,7 +315,7 @@ func testStandardStoreInterface(t *testing.T, s forest.Store, storeImplName stri if err != nil { t.Errorf("Recent failed on valid input: %v", err) } else if len(recentNodes) < 2 { - t.Errorf("Recent on store with data returned too few results") + t.Errorf("Recent on store with data returned too few results: want %d, got %d", 2, len(recentNodes)) } else if !recentNodes[0].Equals(run.atZero) { t.Errorf("Expected most recent node to be the newly created one") } else if !recentNodes[1].Equals(run.atOne) { @@ -140,20 +325,11 @@ func testStandardStoreInterface(t *testing.T, s forest.Store, storeImplName stri if err != nil { t.Errorf("Recent failed on valid input: %v", err) } else if len(recentNodes) > 2 { - t.Errorf("Recent on store with only two matching nodes returned more than 2 results") + t.Errorf("Recent (%v) on store with only two matching nodes returned more than 2 results: %#v", run.NodeType, recentNodes) } } } -func containsID(ids []*fields.QualifiedHash, id *fields.QualifiedHash) bool { - for _, current := range ids { - if current.Equals(id) { - return true - } - } - return false -} - func TestCacheStore(t *testing.T) { s1 := store.NewMemoryStore() s2 := store.NewMemoryStore() @@ -228,3 +404,282 @@ func TestCacheStoreUpPropagation(t *testing.T) { } } } + +func BenchmarkOrchard(b *testing.B) { + benchmarkStore(b, loadStore(useOrchard)) +} + +func BenchmarkGrove(b *testing.B) { + benchmarkStore(b, loadStore(useGrove)) +} + +func BenchmarkMemory(b *testing.B) { + benchmarkStore(b, loadStore(useMemory)) +} + +func benchmarkStore(b *testing.B, load Load) { + benches := []struct { + Name string + Size Size + }{ + {"nano", Size{Identities: 5, Replies: 10}}, + {"micro", Size{Identities: 5, Replies: 20}}, + {"small", Size{Identities: 10, Replies: 40}}, + {"nominal", Size{Identities: 20, Replies: 80}}, + {"medium", Size{Identities: 40, Replies: 160}}, + {"large", Size{Identities: 80, Replies: 320}}, + {"uber", Size{Identities: 160, Replies: 640}}, + } + for _, bench := range benches { + s, err := load(bench.Size) + if err != nil { + b.Skipf("loading store: %v", err) + } + b.Run(fmt.Sprintf("Recent/%s", bench.Name), func(b *testing.B) { + var ( + recent []forest.Node + err error + ) + b.ReportAllocs() + b.ResetTimer() + for ii := 0; ii < b.N; ii++ { + recent, err = s.Recent(fields.NodeTypeReply, 100) + if err != nil { + b.Errorf("loading %d recent nodes: %v", 100, err) + } + _ = recent + _ = err + } + }) + b.Run(fmt.Sprintf("Get/%s", bench.Name), func(b *testing.B) { + var ( + n forest.Node + ok bool + err error + ) + nodes, err := s.Recent(fields.NodeTypeReply, 100) + if err != nil { + b.Skipf("loading recent replies: %v", err) + } + b.ReportAllocs() + b.ResetTimer() + for ii := 0; ii < b.N; ii++ { + n, ok, err = s.Get(nodes[rand.Intn(len(nodes)-1)].ID()) + if err != nil { + b.Errorf("loading %d recent nodes: %v", 100, err) + } + _ = n + _ = ok + _ = err + } + }) + b.Run(fmt.Sprintf("Children/%s", bench.Name), func(b *testing.B) { + var ( + children []*fields.QualifiedHash + err error + ) + nodes, err := s.Recent(fields.NodeTypeReply, 100) + if err != nil { + b.Skipf("loading recent replies: %v", err) + } + b.ReportAllocs() + b.ResetTimer() + for ii := 0; ii < b.N; ii++ { + children, err = s.Children(nodes[rand.Intn(len(nodes)-1)].ID()) + if err != nil { + b.Errorf("loading %d child nodes: %v", 100, err) + } + _ = children + _ = err + } + }) + b.Run(fmt.Sprintf("LeavesOf/%s", bench.Name), func(b *testing.B) { + var ( + leaves []*fields.QualifiedHash + err error + archive = store.NewArchive(s) + ) + nodes, err := archive.Recent(fields.NodeTypeReply, 100) + if err != nil { + b.Skipf("loading recent replies: %v", err) + } + b.ReportAllocs() + b.ResetTimer() + for ii := 0; ii < b.N; ii++ { + leaves, err = archive.LeavesOf(nodes[rand.Intn(len(nodes)-1)].ID()) + if err != nil { + b.Errorf("loading %d leaf nodes: %v", 100, err) + } + _ = leaves + _ = err + } + }) + if closer, ok := s.(interface{ Close() error }); ok { + if err := closer.Close(); err != nil { + b.Logf("closing: %v", err) + } + } + } +} + +// GenerateCommunity creates a community with nIdentities and nReplies per +// identity and writes the nodes to the provided store. +func GenerateCommunity(nIdentities, nReplies int, s forest.Store) { + var ( + community = testutil.MockCommunity(forest.As(testutil.MockIdentity()), "test-community") + conversations []*forest.Reply + builders []*forest.Builder + ) + if err := s.Add(community); err != nil { + panic(fmt.Errorf("adding node to memory store: %w", err)) + } + for ii := 0; ii < nIdentities; ii++ { + b := forest.As(testutil.MockIdentity()) + // Random timestamp up to the max value of an int. + b.Timer = func() time.Time { + return fields.Timestamp(rand.Intn(int(^uint(0) >> 1))).Time() + } + builders = append(builders, b) + if err := s.Add(b.User); err != nil { + panic(fmt.Errorf("adding node to memory store: %w", err)) + } + reply := testutil.MockReply(b, community) + if err := s.Add(reply); err != nil { + panic(fmt.Errorf("adding node to memory store: %w", err)) + } + conversations = append(conversations, reply) + } + // Reply to conversations up to nReplies per identity. + for _, b := range builders { + for ii := 0; ii < nReplies; ii++ { + if err := s.Add(testutil.MockReply(b, conversations[rand.Intn(len(conversations))])); err != nil { + panic(fmt.Errorf("adding node to memory store: %w", err)) + } + } + } +} + +// info is a helper that gets printed without noise during `assert.ElementsMatch`. +type info struct { + ID string + Type string + Created fields.Timestamp +} + +func infos(nodes []forest.Node) (idlist []info) { + for _, n := range nodes { + var t fields.NodeType + switch n := n.(type) { + case *forest.Reply: + t = n.Type + case *forest.Identity: + t = n.Type + case *forest.Community: + t = n.Type + } + idlist = append(idlist, info{ + ID: n.ID().String(), + Type: t.String(), + Created: fields.TimestampFrom(n.CreatedAt()), + }) + } + return idlist +} + +func isHomogeneous(nt fields.NodeType, nodes []forest.Node) bool { + for _, n := range nodes { + switch n := n.(type) { + case *forest.Reply: + if n.Type != nt { + return false + } + case *forest.Identity: + if n.Type != nt { + return false + } + case *forest.Community: + if n.Type != nt { + return false + } + } + } + return true +} + +func containsID(ids []*fields.QualifiedHash, id *fields.QualifiedHash) bool { + for _, current := range ids { + if current.Equals(id) { + return true + } + } + return false +} + +// Size specifies the size of a community in terms of identities and replies per +// identity. +// Functions as a key for loading a store. +type Size struct { + Identities int + Replies int +} + +// Load can load a store. +type Load = func(sz Size) (forest.Store, error) + +// Init will return a store for the given key. +// Bool indicates whether the store been initialised with a community. +type Init = func(key string) (s forest.Store, ok bool, err error) + +// loadStore loads a store using the initializer function. +// +// This indirection allows us to implement a lazy loading of stores, where the +// community will be generated if it doesn't exist, and re-used if it does. +// +// This is because generating communities is expensive due to the cryptography +// involved. +func loadStore(init Init) Load { + return func(sz Size) (forest.Store, error) { + s, ok, err := init(fmt.Sprintf("%d_%d_", sz.Identities, sz.Replies)) + if err != nil { + return nil, err + } + if !ok { + GenerateCommunity(sz.Identities, sz.Replies, s) + } + return s, nil + } +} + +// useOrchard will return or create an orchard with the given prefix. +func useOrchard(prefix string) (s forest.Store, exists bool, err error) { + path := filepath.Join(os.TempDir(), "benchmark_cache", fmt.Sprintf("%sorchard.db", prefix)) + if _, err := os.Stat(path); err == nil { + exists = true + } + o, err := orchard.Open(path) + if err != nil { + return nil, false, err + } + return o, exists, nil +} + +// useGrove will return or create a grove with the given prefix. +func useGrove(prefix string) (s forest.Store, exists bool, err error) { + path := filepath.Join(os.TempDir(), "benchmark_cache", fmt.Sprintf("%sgrove", prefix)) + if _, err := os.Stat(path); err == nil { + exists = true + } + if err := os.MkdirAll(path, 0777); err != nil && !errors.Is(err, os.ErrExist) { + return nil, exists, err + } + g, err := grove.New(path) + if err != nil { + return nil, false, err + } + return g, exists, nil +} + +// useMemory will return an empty memory store. +func useMemory(prefix string) (s forest.Store, exists bool, err error) { + return store.NewMemoryStore(), false, nil +} -- 2.30.0.windows.1