: 2 doc: explain store methods feat: orchard store 23 files changed, 2396 insertions(+), 94 deletions(-)
Hey Jack, Thanks so much for working on this. It's been a huge effort, but it's looking really good. I do have some questions/comments/recommendations, but I think they shouldn't be terribly painful. - The docs for Orchard.RecentReplies (and similar methods) say that they return nodes older than the provided timestamp, but the timestamp parameter is named "earliest". This seems backwards to me. Should that not be called "before", or "latest"? - The RecentReplies implementation appears to always unconditionally iterate over all replies. I think it would be relatively straightforward to make it exit after it has collected the required number of replies (without finishing the iteration), and I'd hope it's possible for us to seek to the requested start time without iterating from the beginning. The seeking might be complex though, and I'd be happy to defer that work for later if that's the case. - Orchard.ChildrenBatched should document the children's iteration order in its godoc somewhere. - TestChildrenBatched has three commented-out test cases. Are they obsolete, broken, or something else? - signer_test.go got reformatted in this patch, but no logical changes were made. I don't mind the reformatting, but I'd prefer for cosmetic changes like that to be separated out to reduce the size and noise level of the patch. - Both the tests in store and in orchard have a setup function that does pretty similar stuff. I think you can likely unify those without too much pain. Do you think that's worthwhile? Also, I'm struggling with the tests. I had to apply this patch to make them run at all, and they're still failing for me (Linux): ``` diff --git a/store/store_test.go b/store/store_test.go index e6796b9..d8a69e9 100644 --- a/store/store_test.go +++ b/store/store_test.go @@ -713,7 +713,7 @@ func (ids IDList) Contains(id *fields.QualifiedHash) bool { // The Grove is used as the reference implementation, which the Orchard is tested against. // After a test run, the 'orchard.db' file is moved to a known location for inspection. func setup(t *testing.T, prefix string) (*orchard.Orchard, *grove.Grove) { - tmp, err := ioutil.TempDir("forest_go_test", "") + tmp, err := ioutil.TempDir("", "forest_go_test") if err != nil { t.Skipf("error creating testing directory: %v", err) } @@ -721,10 +721,10 @@ func setup(t *testing.T, prefix string) (*orchard.Orchard, *grove.Grove) { orchardPath = filepath.Join(tmp, "orchard_test.db") grovePath = filepath.Join(tmp, "grove") ) - if err := os.MkdirAll(filepath.Dir(orchardPath), 0644)+ if err := os.MkdirAll(filepath.Dir(orchardPath), 0755) t.Skipf("preparing orchard file: %v", err) } - if err := os.MkdirAll(grovePath, 0644)+ if err := os.MkdirAll(grovePath, 0755) t.Skipf("preparing grove directory: %v", err) } s, err := orchard.Open(orchardPath) @@ -753,7 +753,7 @@ func setup(t *testing.T, prefix string) (*orchard.Orchard, *grove.Grove) { if err := os.RemoveAll(target) return fmt.Errorf("cleaning target path: %w", err) } - if err := os.MkdirAll(target, 0644)+ if err := os.MkdirAll(target, 0755) return fmt.Errorf("creating target path: %w", err) } if err := copyFile(orchardPath, filepath.Join(target, name)) ``` I can dig into why the tests are failing tomorrow. I didn't have time tonight. I suspect it's permissions somehow, but I'm not sure yet. Just wanted to get you feedback as soon as I could. Thanks again, I think this is almost ready to merge. Cheers, Chris
Copy & paste the following snippet into your terminal to import this patchset into git:
curl -s https://lists.sr.ht/~whereswaldon/arbor-dev/patches/22650/mbox | git am -3Learn more about email & git
From: Jack Mordaunt <jackmordaunt@gmail.com> --- store.go | 10 +++++++++- store/memory-store.go | 7 ++++++- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/store.go b/store.go index 716be7f..82a206e 100644 --- a/store.go +++ b/store.go @@ -4,13 +4,21 @@ import ( "git.sr.ht/~whereswaldon/forest-go/fields" ) +// Store describes a collection of `forest.Node`. type Store interface { + // CopyInto allows transformations between Store instances. CopyInto(Store) error + // Get retrieves a node by ID. Get(*fields.QualifiedHash) (Node, bool, error) + // GetIdentity retrieves an identity node by ID. GetIdentity(*fields.QualifiedHash) (Node, bool, error) + // GetCommunity retrieves a community node by ID. GetCommunity(*fields.QualifiedHash) (Node, bool, error) + // GetConversation retrieves a conversation node by ID. GetConversation(communityID, conversationID *fields.QualifiedHash) (Node, bool, error) + // GetReply retrieves a reply node by ID. GetReply(communityID, conversationID, replyID *fields.QualifiedHash) (Node, bool, error) + // Children returns a list of child nodes for the given node ID. Children(*fields.QualifiedHash) ([]*fields.QualifiedHash, error) // Recent returns recently-created (as per the timestamp in the node) nodes. // It may return both a slice of nodes and an error if some nodes in the @@ -19,6 +27,6 @@ type Store interface { // Add inserts a node into the store. It is *not* an error to insert a node which is already // stored. Implementations must not return an error in this case. Add(Node) error - + // RemoveSubtree from the store. RemoveSubtree(*fields.QualifiedHash) error } diff --git a/store/memory-store.go b/store/memory-store.go index 0e23ae5..ae35d73 100644 --- a/store/memory-store.go +++ b/store/memory-store.go @@ -8,13 +8,18 @@ import ( "git.sr.ht/~whereswaldon/forest-go/fields" ) +// MemoryStore is an in-memory node store. type MemoryStore struct { - Items map[string]forest.Node + // Items is a flat map of all nodes. + Items map[string]forest.Node + // ChildMap describes the parent-child relationship between nodes. + // Each list of child nodes is keyed by the parent node. ChildMap map[string][]string } var _ forest.Store = &MemoryStore{} +// NewMemoryStore allocates a MemoryStore. func NewMemoryStore() *MemoryStore { return &MemoryStore{ Items: make(map[string]forest.Node), -- 2.30.0.windows.1
From: Jack Mordaunt <jackmordaunt@gmail.com> Orchard is an implementation of `forest.Store` using boltdb: a single file database. It boasts better performance and scalability than the Grove store. --- builder.go | 17 +- fields/primitives.go | 28 +- go.mod | 7 +- go.sum | 8 +- grove/child-cache.go | 3 + grove/grove.go | 13 +- nodes.go | 27 +- orchard/cursor.go | 206 ++++++++++ orchard/index.go | 67 ++++ orchard/internal/codec/codec.go | 90 +++++ orchard/internal/mock/mock.go | 144 +++++++ orchard/orchard.go | 661 ++++++++++++++++++++++++++++++++ orchard/orchard_test.go | 447 +++++++++++++++++++++ serialize/serializer.go | 96 +++-- signer_test.go | 22 +- store.go | 24 +- store/archive.go | 5 +- store/cache-store.go | 11 +- store/store_test.go | 548 +++++++++++++++++++++++++- testkeys/testkeys.go | 15 + testutil/node_utils.go | 34 ++ 21 files changed, 2381 insertions(+), 92 deletions(-) create mode 100644 orchard/cursor.go create mode 100644 orchard/index.go create mode 100644 orchard/internal/codec/codec.go create mode 100644 orchard/internal/mock/mock.go create mode 100644 orchard/orchard.go create mode 100644 orchard/orchard_test.go diff --git a/builder.go b/builder.go index 2058ab0..8a0a466 100644 --- a/builder.go @@ -212,7 +212,7 @@ func NewIdentityQualified(signer Signer, name *fields.QualifiedContent, metadata if err != nil { return nil, err } - identity.id = fields.Blob(id) + identity.Identifier = fields.Blob(id) return identity, nil } @@ -221,6 +221,7 @@ func NewIdentityQualified(signer Signer, name *fields.QualifiedContent, metadata type Builder struct { User *Identity Signer + Timer func() time.Time } // As creates a Builder that can write new nodes on behalf of the provided user. @@ -231,9 +232,17 @@ func As(user *Identity, signer Signer) *Builder { return &Builder{ User: user, Signer: signer, + Timer: func() time.Time { return time.Now() }, } } +func (b *Builder) Now() time.Time { + if b.Timer != nil { + return b.Timer() + } + return time.Now() +} + // NewCommunity creates a community node (signed by the given identity with the given privkey). func (n *Builder) NewCommunity(name string, metadata []byte) (*Community, error) { qname, err := fields.NewQualifiedContent(fields.ContentTypeUTF8String, []byte(name)) @@ -288,7 +297,7 @@ func (n *Builder) NewCommunityQualified(name *fields.QualifiedContent, metadata if err != nil { return nil, err } - c.id = fields.Blob(id) + c.Identifier = fields.Blob(id) return c, nil } @@ -310,7 +319,7 @@ func (n *Builder) NewReplyQualified(parent interface{}, content, metadata *field r := newReply() r.Version = fields.CurrentVersion r.Type = fields.NodeTypeReply - r.Created = fields.TimestampFrom(time.Now()) + r.Created = fields.TimestampFrom(n.Now()) switch concreteParent := parent.(type) { case *Community: r.CommunityID = *concreteParent.ID() @@ -360,7 +369,7 @@ func (n *Builder) NewReplyQualified(parent interface{}, content, metadata *field if err != nil { return nil, err } - r.id = fields.Blob(id) + r.Identifier = fields.Blob(id) return r, nil } diff --git a/fields/primitives.go b/fields/primitives.go index 9782b70..74e9f3e 100644 --- a/fields/primitives.go +++ b/fields/primitives.go @@ -18,9 +18,9 @@ const ( HashDigestLengthSHA512_256 ContentLength = 32 ) -// multiByteSerializationOrder defines the order in which multi-byte +// MultiByteSerializationOrder defines the order in which multi-byte // integers are serialized into binary -var multiByteSerializationOrder binary.ByteOrder = binary.BigEndian +var MultiByteSerializationOrder binary.ByteOrder = binary.BigEndian // fundamental types type genericType uint8 @@ -29,13 +29,13 @@ const sizeofgenericType = 1 func (g genericType) MarshalBinary() ([]byte, error) { b := new(bytes.Buffer) - err := binary.Write(b, multiByteSerializationOrder, g) + err := binary.Write(b, MultiByteSerializationOrder, g) return b.Bytes(), err } func (g *genericType) UnmarshalBinary(b []byte) error { buf := bytes.NewBuffer(b) - return binary.Read(buf, multiByteSerializationOrder, g) + return binary.Read(buf, MultiByteSerializationOrder, g) } func (g *genericType) BytesConsumed() int { @@ -68,7 +68,7 @@ func NewContentLength(size int) (*ContentLength, error) { // MarshalBinary converts the ContentLength into its binary representation func (c ContentLength) MarshalBinary() ([]byte, error) { b := new(bytes.Buffer) - err := binary.Write(b, multiByteSerializationOrder, c) + err := binary.Write(b, MultiByteSerializationOrder, c) return b.Bytes(), err } @@ -90,7 +90,7 @@ func (c *ContentLength) UnmarshalText(b []byte) error { // back to its structured form func (c *ContentLength) UnmarshalBinary(b []byte) error { buf := bytes.NewBuffer(b) - return binary.Read(buf, multiByteSerializationOrder, c) + return binary.Read(buf, MultiByteSerializationOrder, c) } func (c *ContentLength) BytesConsumed() int { @@ -109,7 +109,7 @@ const sizeofTreeDepth = 4 // MarshalBinary converts the TreeDepth into its binary representation func (t TreeDepth) MarshalBinary() ([]byte, error) { b := new(bytes.Buffer) - err := binary.Write(b, multiByteSerializationOrder, t) + err := binary.Write(b, MultiByteSerializationOrder, t) return b.Bytes(), err } @@ -121,7 +121,7 @@ func (t TreeDepth) MarshalText() ([]byte, error) { // back to its structured form func (t *TreeDepth) UnmarshalBinary(b []byte) error { buf := bytes.NewBuffer(b) - return binary.Read(buf, multiByteSerializationOrder, t) + return binary.Read(buf, MultiByteSerializationOrder, t) } func (t *TreeDepth) BytesConsumed() int { @@ -190,7 +190,7 @@ const sizeofVersion = 2 // MarshalBinary converts the Version into its binary representation func (v Version) MarshalBinary() ([]byte, error) { b := new(bytes.Buffer) - err := binary.Write(b, multiByteSerializationOrder, v) + err := binary.Write(b, MultiByteSerializationOrder, v) return b.Bytes(), err } @@ -202,7 +202,7 @@ func (v Version) MarshalText() ([]byte, error) { // back to its structured form func (v *Version) UnmarshalBinary(b []byte) error { buf := bytes.NewBuffer(b) - return binary.Read(buf, multiByteSerializationOrder, v) + return binary.Read(buf, MultiByteSerializationOrder, v) } func (v *Version) BytesConsumed() int { @@ -234,7 +234,7 @@ func (t Timestamp) Time() time.Time { // MarshalBinary converts the Timestamp into its binary representation func (v Timestamp) MarshalBinary() ([]byte, error) { b := new(bytes.Buffer) - err := binary.Write(b, multiByteSerializationOrder, v) + err := binary.Write(b, MultiByteSerializationOrder, v) return b.Bytes(), err } @@ -246,7 +246,7 @@ func (v Timestamp) MarshalText() ([]byte, error) { // back to its structured form func (v *Timestamp) UnmarshalBinary(b []byte) error { buf := bytes.NewBuffer(b) - return binary.Read(buf, multiByteSerializationOrder, v) + return binary.Read(buf, MultiByteSerializationOrder, v) } func (v *Timestamp) BytesConsumed() int { @@ -306,6 +306,10 @@ func (t *NodeType) Equals(t2 *NodeType) bool { return ((*genericType)(t)).Equals((*genericType)(t2)) } +func (t NodeType) String() string { + return NodeTypeNames[t] +} + type HashType genericType const ( diff --git a/go.mod b/go.mod index b5fada4..aa3a710 100644 --- a/go.mod +++ b/go.mod @@ -1,6 +1,11 @@ module git.sr.ht/~whereswaldon/forest-go -require golang.org/x/crypto v0.0.0-20201016220609-9e8e0b390897 +require ( + github.com/boltdb/bolt v1.3.1 + github.com/shamaton/msgpack v1.2.1 + golang.org/x/crypto v0.0.0-20210415154028-4f45737414dc + golang.org/x/sys v0.0.0-20210415045647-66c3f260301c // indirect +) replace golang.org/x/crypto => github.com/ProtonMail/crypto v0.0.0-20201022141144-3fe6b6992c0f diff --git a/go.sum b/go.sum index 30bdd5c..703269f 100644 --- a/go.sum +++ b/go.sum @@ -1,7 +1,11 @@ github.com/ProtonMail/crypto v0.0.0-20201022141144-3fe6b6992c0f h1:CrqdTsoF7teMqQok+iHUx3yjYJfkpDuU7y/nIxRJ2rY= github.com/ProtonMail/crypto v0.0.0-20201022141144-3fe6b6992c0f/go.mod h1:Pxr7w4gA2ikI4sWyYwEffm+oew1WAJHzG1SiDpQMkrI= +github.com/boltdb/bolt v1.3.1 h1:JQmyP4ZBrce+ZQu0dY660FMfatumYDLun9hBCUVIkF4= +github.com/boltdb/bolt v1.3.1/go.mod h1:clJnj/oiGkjum5o1McbSZDSLxVThjynRyGBgiAx27Ps= +github.com/shamaton/msgpack v1.2.1 h1:40cwW7YAEdOIxcxIsUkAxSMUyYWZUyNiazI5AyiBntI= +github.com/shamaton/msgpack v1.2.1/go.mod h1:ibiaNQRTCUISAYkkyOpaSCEBiCAxXe6u6Mu1sQ6945U= golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= -golang.org/x/sys v0.0.0-20190412213103-97732733099d h1:+R4KGOnez64A81RvjARKc4UT5/tI9ujCIVX+P5KiHuI= golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/text v0.3.0 h1:g61tztE5qeGQ89tm6NTjjM9VPIm088od1l6aSorWRWg= +golang.org/x/sys v0.0.0-20210415045647-66c3f260301c h1:6L+uOeS3OQt/f4eFHXZcTxeZrGCuz+CLElgEBjbcTA4= +golang.org/x/sys v0.0.0-20210415045647-66c3f260301c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= diff --git a/grove/child-cache.go b/grove/child-cache.go index caf9c07..792dd4d 100644 --- a/grove/child-cache.go +++ b/grove/child-cache.go @@ -35,6 +35,9 @@ func (c *ChildCache) Get(parent *fields.QualifiedHash) ([]*fields.QualifiedHash, if !inMap { return nil, false } + if len(submap) == 0 { + return nil, true + } out := make([]*fields.QualifiedHash, 0, len(submap)) for _, child := range submap { out = append(out, child) diff --git a/grove/grove.go b/grove/grove.go index 70151b3..3b39fc3 100644 --- a/grove/grove.go +++ b/grove/grove.go @@ -306,7 +306,7 @@ func (g *Grove) Children(id *fields.QualifiedHash) ([]*fields.QualifiedHash, err } children, inCache = g.ChildCache.Get(id) if !inCache { - return []*fields.QualifiedHash{}, nil + return nil, nil } return children, nil @@ -500,7 +500,16 @@ func (g *Grove) GetReply(communityID, conversationID, replyID *fields.QualifiedH // of the functionality that can be implemented simply. However, it is // implementable, and should be done as soon as is feasible. func (g *Grove) CopyInto(other forest.Store) error { - return fmt.Errorf("method CopyInto() is not currently implemented on Grove") + nodes, err := g.allNodes() + if err != nil { + return err + } + for _, n := range nodes { + if err := other.Add(n); err != nil { + return fmt.Errorf("copying node: %w", err) + } + } + return nil } // RemoveSubtree removes the subtree rooted at the node diff --git a/nodes.go b/nodes.go index d340dcd..455bb61 100644 --- a/nodes.go +++ b/nodes.go @@ -43,6 +43,19 @@ func ValidateNode(n Node, store Store) error { return nil } +// Is queries the node for it's concrete type. +func Is(nt fields.NodeType, n Node) bool { + switch n.(type) { + case *Identity: + return nt == fields.NodeTypeIdentity + case *Community: + return nt == fields.NodeTypeCommunity + case *Reply: + return nt == fields.NodeTypeReply + } + return false +} + // NodeTypeOf returns the NodeType of the provided binary-marshaled node. // If the provided bytes are not a forest node or the type cannot be determined, // an error will be returned and the first return value must be ignored. @@ -87,8 +100,8 @@ type SchemaInfo struct { // generic node type CommonNode struct { - // the ID is deterministically computed from the rest of the values - id fields.Blob + // Deterministically computed from the rest of the values. + Identifier fields.Blob SchemaInfo `arbor:"order=0,recurse=always"` Parent fields.QualifiedHash `arbor:"order=1,recurse=serialize"` IDDesc fields.HashDescriptor `arbor:"order=2,recurse=always"` @@ -102,7 +115,7 @@ type CommonNode struct { func (n CommonNode) ID() *fields.QualifiedHash { return &fields.QualifiedHash{ Descriptor: n.IDDesc, - Blob: n.id, + Blob: n.Identifier, } } @@ -111,7 +124,7 @@ func (n CommonNode) CreatedAt() time.Time { } func (n CommonNode) ParentID() *fields.QualifiedHash { - return &fields.QualifiedHash{n.Parent.Descriptor, n.Parent.Blob} + return &fields.QualifiedHash{Descriptor: n.Parent.Descriptor, Blob: n.Parent.Blob} } func (n CommonNode) TreeDepth() fields.TreeDepth { @@ -270,7 +283,7 @@ func (i *Identity) UnmarshalBinary(b []byte) error { if err != nil { return err } - i.id, err = computeID(i) + i.Identifier, err = computeID(i) return err } @@ -352,7 +365,7 @@ func (c *Community) UnmarshalBinary(b []byte) error { if err != nil { return err } - c.id, err = computeID(c) + c.Identifier, err = computeID(c) return err } @@ -440,7 +453,7 @@ func (r *Reply) UnmarshalBinary(b []byte) error { if err != nil { return err } - r.id, err = computeID(r) + r.Identifier, err = computeID(r) return err } diff --git a/orchard/cursor.go b/orchard/cursor.go new file mode 100644 index 0000000..b509428 --- /dev/null +++ b/orchard/cursor.go @@ -0,0 +1,206 @@ +package orchard + +import ( + "fmt" + "io" + + "git.sr.ht/~whereswaldon/forest-go" + "git.sr.ht/~whereswaldon/forest-go/orchard/internal/codec" + "github.com/boltdb/bolt" +) + +// ByteCursor is a cursor that iterates over []byte kv pairs. +type ByteCursor interface { + First() ([]byte, []byte) + Next() ([]byte, []byte) +} + +// indexCursor uses an index bucket to iterate over the parent bucket. +// Collided index entries are flattened in the order they were inserted. +type indexCursor struct { + // Parent bucket contains the real values being indexed. + Parent *bolt.Bucket + // Index bucket contains the index values, which are some arbitrary key + // mapped to entries in the Parent bucket. + Index *bolt.Bucket + // Reverse iterates backward, if true. + Reverse bool + + // cursor holds the current state of the bucket cursor. + cursor *bolt.Cursor + // collisions holds the current state of the collision bucket cursor. + // If collisions is not nil, we are iterating over collided entries for a + // given key. + collisions *bolt.Cursor +} + +// First returns the first k,v pair. +// This method initializes the underlying cursor and must be called before calls +// to `indexCursor.Next`. +func (c *indexCursor) First() ([]byte, []byte) { + if c.Index == nil { + return nil, nil + } + c.cursor = c.Index.Cursor() + k, v := c.first(c.cursor) + if k == nil { + return nil, nil + } + if v != nil { + id := v + return id, c.Parent.Get(id) + } + if b := c.Index.Bucket(k); b != nil { + c.collisions = b.Cursor() + _, id := c.first(c.collisions) + return id, c.Parent.Get(id) + } + return nil, nil +} + +// Next returns the next k,v pair until k is nil. +func (c *indexCursor) Next() ([]byte, []byte) { + var cursor = c.cursor + if c.collisions != nil { + cursor = c.collisions + } + k, v := c.next(cursor) + if k == nil { + if c.collisions != nil { + c.collisions = nil + _, id := c.next(c.cursor) + return id, c.Parent.Get(id) + } + return nil, nil + } + if v != nil { + id := v + return id, c.Parent.Get(id) + } + if b := c.Index.Bucket(k); b != nil { + c.collisions = b.Cursor() + _, id := c.first(c.collisions) + return id, c.Parent.Get(id) + } + return nil, nil +} + +func (c *indexCursor) next(cursor *bolt.Cursor) ([]byte, []byte) { + if c.Reverse { + return cursor.Prev() + } + return cursor.Next() +} + +func (c *indexCursor) first(cursor *bolt.Cursor) ([]byte, []byte) { + if c.Reverse { + return cursor.Last() + } + return cursor.First() +} + +/* + NOTE(jfm): The following cursor types are structurally identical, but with different return types. +*/ + +// ReplyCursor wraps byte cursor and decodes into `forest.Reply` values. +type ReplyCursor struct { + Inner ByteCursor + Codec *codec.Default +} + +// First returns the first reply or EOF if there are no. +func (c *ReplyCursor) First() (r forest.Reply, err error) { + k, v := c.Inner.First() + if k == nil { + return r, io.EOF + } + data := make([]byte, len(v)) + copy(data, v) + if err := c.Codec.Decode(data, &r); err != nil { + return r, fmt.Errorf("decoding reply: %w", err) + } + return r, nil +} + +// Next returns the next reply or EOF if there are no more replies. +func (c *ReplyCursor) Next() (r forest.Reply, err error) { + k, v := c.Inner.Next() + if k == nil { + return r, io.EOF + } + data := make([]byte, len(v)) + copy(data, v) + if err := c.Codec.Decode(data, &r); err != nil { + return r, fmt.Errorf("decoding reply: %w", err) + } + return r, nil +} + +// IdentityCursor wraps byte cursor and decodes into `forest.Identity` values. +type IdentityCursor struct { + Inner ByteCursor + Codec *codec.Default +} + +// First returns the first identity or EOF if there are no. +func (c *IdentityCursor) First() (r forest.Identity, err error) { + k, v := c.Inner.First() + if k == nil { + return r, io.EOF + } + data := make([]byte, len(v)) + copy(data, v) + if err := c.Codec.Decode(data, &r); err != nil { + return r, fmt.Errorf("decoding identity: %w", err) + } + return r, nil +} + +// Next returns the next identity or EOF if there are no more. +func (c *IdentityCursor) Next() (r forest.Identity, err error) { + k, v := c.Inner.Next() + if k == nil { + return r, io.EOF + } + data := make([]byte, len(v)) + copy(data, v) + if err := c.Codec.Decode(data, &r); err != nil { + return r, fmt.Errorf("decoding identity: %w", err) + } + return r, nil +} + +// CommunityCursor wraps byte cursor and decodes into `forest.Community` values. +type CommunityCursor struct { + Inner ByteCursor + Codec *codec.Default +} + +// First returns the first community or EOF if there are no. +func (c *CommunityCursor) First() (r forest.Community, err error) { + k, v := c.Inner.First() + if k == nil { + return r, io.EOF + } + data := make([]byte, len(v)) + copy(data, v) + if err := c.Codec.Decode(data, &r); err != nil { + return r, fmt.Errorf("decoding community: %w", err) + } + return r, nil +} + +// Next returns the next community or EOF if there are no more. +func (c *CommunityCursor) Next() (r forest.Community, err error) { + k, v := c.Inner.Next() + if k == nil { + return r, io.EOF + } + data := make([]byte, len(v)) + copy(data, v) + if err := c.Codec.Decode(data, &r); err != nil { + return r, fmt.Errorf("decoding community: %w", err) + } + return r, nil +} diff --git a/orchard/index.go b/orchard/index.go new file mode 100644 index 0000000..25e6428 --- /dev/null +++ b/orchard/index.go @@ -0,0 +1,67 @@ +package orchard + +import ( + "fmt" + + "git.sr.ht/~whereswaldon/forest-go/fields" + "github.com/boltdb/bolt" +) + +// index maps keys to node IDs for accelerated queries. +// On collision, all values for the collided key are placed into a bucket. +// Must be used inside a transaction. +type index struct { + *bolt.Bucket +} + +// Put a kv pair into the index. +// If an entry for k exists, all values are inserted into a bucket for that +// key. +func (idx index) Put(k []byte, v []byte) error { + nextID := func(b *bolt.Bucket) ([]byte, error) { + // Generate a unique ID for this sub-entry. + // ID is not used other than by the database for purposes of + // sorting and iterating. + kint, err := b.NextSequence() + if err != nil { + return nil, err + } + k := make([]byte, 8) + fields.MultiByteSerializationOrder.PutUint64(k, kint) + return k, nil + } + if collision := idx.Bucket.Get(k); collision != nil { + // Move any existing value into a collision bucket. + if err := idx.Bucket.Delete(k); err != nil { + return fmt.Errorf("failed deleting colliding entry: %w", err) + } + collisions, err := idx.Bucket.CreateBucketIfNotExists(k) + if err != nil { + return fmt.Errorf("failed creating new bucket for key: %w", err) + } + k, err := nextID(collisions) + if err != nil { + return fmt.Errorf("failed generating next id: %w", err) + } + if err := collisions.Put(k, collision); err != nil { + return fmt.Errorf("failed inserting into new bucket: %w", err) + } + } + if collisions := idx.Bucket.Bucket(k); collisions != nil { + // Place the new value into the collisions bucket. + k, err := nextID(collisions) + if err != nil { + return fmt.Errorf("failed generating next id: %w", err) + } + if err := collisions.Put(k, v); err != nil { + return fmt.Errorf("failed inserting into collision bucket: %w", err) + } + return nil + } + // No collision, place value into the root bucket. + + if err := idx.Bucket.Put(k, v); err != nil { + return fmt.Errorf("failed inserting into root bucket: %w", err) + } + return nil +} diff --git a/orchard/internal/codec/codec.go b/orchard/internal/codec/codec.go new file mode 100644 index 0000000..1c96562 --- /dev/null +++ b/orchard/internal/codec/codec.go @@ -0,0 +1,90 @@ +package codec + +import ( + "encoding" + "reflect" + "sync" + + "git.sr.ht/~whereswaldon/forest-go/serialize" + "github.com/shamaton/msgpack" +) + +// Codec can encode and decode forest types. +type Codec interface { + Encode(v interface{}) ([]byte, error) + Decode(b []byte, v interface{}) error +} + +// Default initializes to the safe Arbor codec, unless overriden by internal +// code such as test code. +type Default struct { + Inner Codec + sync.Once +} + +func (d *Default) Encode(v interface{}) ([]byte, error) { + d.Once.Do(func() { + if d.Inner == nil { + d.Inner = Arbor{} + } + }) + return d.Inner.Encode(v) +} +func (d *Default) Decode(b []byte, v interface{}) error { + d.Once.Do(func() { + if d.Inner == nil { + d.Inner = Arbor{} + } + }) + return d.Inner.Decode(b, v) +} + +// Arbor serializes forest types using the safe arbor serialization that +// validates the data. +type Arbor struct{} + +func (ac Arbor) Encode(v interface{}) ([]byte, error) { + buf, err := serialize.ArborSerialize(reflect.ValueOf(v)) + if err != nil { + if b, ok := v.(encoding.BinaryMarshaler); ok { + return b.MarshalBinary() + } + return nil, err + } + return buf, nil +} + +func (ac Arbor) Decode(by []byte, v interface{}) error { + if _, err := serialize.ArborDeserialize(reflect.ValueOf(v), by); err != nil { + if b, ok := v.(encoding.BinaryUnmarshaler); ok { + return b.UnmarshalBinary(by) + } + return err + } + return nil +} + +// Unsafe serializes forest types without validating them for the purposes of +// fast testing. +// +// This avoids branches within the orchard code that could lead to nodes +// not being validated. +// +// NOTE(jfm) msgpack is used because stdlib encodings have coincidental issues: +// +// - Gob calls into encoding.BinaryMarshaler method set, which calls the +// node validation code paths. +// +// - JSON encodes types like fields.Version into an escaped hexidecimal string, +// but refuses to decode back into fields.Version without some help. +// +// In an attempt to avoid touching the fields types, I've opted to go with a +// encoding that works "out of the box". +type Unsafe struct{} + +func (Unsafe) Encode(v interface{}) ([]byte, error) { + return msgpack.Marshal(v) +} +func (Unsafe) Decode(b []byte, v interface{}) error { + return msgpack.Unmarshal(b, v) +} diff --git a/orchard/internal/mock/mock.go b/orchard/internal/mock/mock.go new file mode 100644 index 0000000..4aafd2d --- /dev/null +++ b/orchard/internal/mock/mock.go @@ -0,0 +1,144 @@ +// Package mock uses mocked structures to filter out non-essential data to clarify testing. +// These mocked types are not valid data, and can only be used during testing where data is +// unvalidated. +package mock + +import ( + "git.sr.ht/~whereswaldon/forest-go" + "git.sr.ht/~whereswaldon/forest-go/fields" +) + +const ( + TypeReply fields.NodeType = fields.NodeTypeReply + TypeIdentity fields.NodeType = fields.NodeTypeIdentity + TypeCommunity fields.NodeType = fields.NodeTypeCommunity +) + +// Node mocks out `forest.Node`. +// Resolves to a non-validated `forest.Node`. +type Node struct { + Type fields.NodeType + ID string + Created fields.Timestamp + Parent ID +} + +func (m Node) Into() forest.Node { + // @todo type switch on node type. + return &forest.Reply{ + CommonNode: forest.CommonNode{ + SchemaInfo: forest.SchemaInfo{Type: fields.NodeTypeReply}, + Identifier: []byte(m.ID), + Created: m.Created, + Parent: *m.Parent.IntoQualifiedHash(), + }, + } +} + +func (m Node) IntoReply() *forest.Reply { + var id []byte + if len(m.ID) > 0 { + id = []byte(m.ID) + } + return &forest.Reply{ + CommonNode: forest.CommonNode{ + SchemaInfo: forest.SchemaInfo{Type: fields.NodeTypeReply}, + Identifier: id, + Created: m.Created, + Parent: *m.Parent.IntoQualifiedHash(), + }, + } +} + +func FromNode(n forest.Node) Node { + var t fields.NodeType + switch n := n.(type) { + case *forest.Reply: + t = n.Type + case *forest.Identity: + t = n.Type + case *forest.Community: + t = n.Type + } + return Node{ + Type: t, + ID: string(n.ID().Blob), + Created: fields.TimestampFrom(n.CreatedAt()), + Parent: FromQualifiedHash(n.ParentID()), + } +} + +func FromReply(r forest.Reply) Node { + return Node{ + Type: fields.NodeTypeReply, + ID: string(r.Identifier), + Created: r.Created, + } +} + +// Nodes is a list of mock nodes that can resolve to a list of non-validated +// `forest.Reply`. +type Nodes []Node + +func (list Nodes) Into() []forest.Node { + var nodes []forest.Node + for _, m := range list { + nodes = append(nodes, m.Into()) + } + return nodes +} + +func (list Nodes) IntoReplies() []forest.Reply { + var replies []forest.Reply + for _, m := range list { + r := m.IntoReply() + replies = append(replies, *r) + } + return replies +} + +func FromNodes(src []forest.Node) (list Nodes) { + for ii, n := range src { + if n == nil { + continue + } + list = append(list, FromNode(src[ii])) + } + return list +} + +func FromReplies(src []forest.Reply) (list Nodes) { + for ii := range src { + list = append(list, FromReply(src[ii])) + } + return list +} + +// ID mocks out `fields.QualifiedHash`. +type ID string + +func (id ID) IntoQualifiedHash() *fields.QualifiedHash { + return &fields.QualifiedHash{ + Blob: []byte(id), + } +} + +func FromQualifiedHash(h *fields.QualifiedHash) ID { + return ID(h.Blob) +} + +type IDs []ID + +func (list IDs) IntoQualifiedHashList() (hashes []*fields.QualifiedHash) { + for _, id := range list { + hashes = append(hashes, id.IntoQualifiedHash()) + } + return hashes +} + +func FromQualifiedHashList(hashes []*fields.QualifiedHash) (list IDs) { + for ii := range hashes { + list = append(list, FromQualifiedHash(hashes[ii])) + } + return list +} diff --git a/orchard/orchard.go b/orchard/orchard.go new file mode 100644 index 0000000..83272d6 --- /dev/null +++ b/orchard/orchard.go @@ -0,0 +1,661 @@ +// Package orchard implements a boltdb backed on-disk node store, satisfying the +// forest.Store interface. +// +// This database is a single file that serializes nodes into buckets. +// Various indexes are used to accelerate important queries. +// +// As a result of using boltdb, Orchard prefers read heavy workloads. +package orchard + +import ( + "fmt" + "io" + "time" + + "git.sr.ht/~whereswaldon/forest-go" + "git.sr.ht/~whereswaldon/forest-go/fields" + "git.sr.ht/~whereswaldon/forest-go/orchard/internal/codec" + "git.sr.ht/~whereswaldon/forest-go/store" + "github.com/boltdb/bolt" +) + +// Bucket contains the name of a bucket in bytes. +type Bucket []byte + +func (b Bucket) String() string { + return string(b) +} + +var ( + // Buckets are the storage primitive used by bolt, that contain a sorted + // list of key-value pairs. Each bucket is homogeneous. + BucketReply Bucket = Bucket("Reply") + BucketIdentity Bucket = Bucket("Identity") + BucketCommunity Bucket = Bucket("Community") + + // Indexes are used to speed up queries. + IndexAge Bucket = Bucket("Age") + IndexType Bucket = Bucket("Type") + IndexChildren Bucket = Bucket("Children") + + Buckets []Bucket = []Bucket{ + BucketReply, + BucketIdentity, + BucketCommunity, + } + + Indexes []Bucket = []Bucket{ + IndexAge, + IndexType, + IndexChildren, + } +) + +// Orchard is a database-backed node store for `forest.Node`. +// Nodes are persisted as schema entities and can be queried as such. +type Orchard struct { + *bolt.DB + ReadCache *store.MemoryStore + + // codec zero value defaults to Arbor serialization. + // Can only be overriden by interal code. + codec codec.Default +} + +// Open a database file at the given path using the standard OS filesystem. +func Open(path string) (*Orchard, error) { + db, err := bolt.Open(path, 0660, nil) + if err != nil { + return nil, fmt.Errorf("opening database file: %w", err) + } + return Using(db) +} + +// Using allocates an Orchard using the provided database handle. +func Using(db *bolt.DB) (*Orchard, error) { + if err := db.Update(func(tx *bolt.Tx) error { + for _, b := range append(Buckets, Indexes...) { + _, err := tx.CreateBucketIfNotExists(b) + if err != nil { + return fmt.Errorf("init %s: %w", b, err) + } + } + return nil + }); err != nil { + return nil, fmt.Errorf("init database: %w", err) + } + return &Orchard{ + DB: db, + ReadCache: store.NewMemoryStore(), + }, nil +} + +// Add inserts the node into the orchard. If the given node is already in the +// orchard, Add will do nothing. It is not an error to insert a node more than +// once. +func (o *Orchard) Add(node forest.Node) error { + if _, ok, err := o.Get(node.ID()); err != nil { + return fmt.Errorf("checking existence of node: %w", err) + } else if ok { + return nil + } + id, err := o.codec.Encode(node.ID()) + if err != nil { + return fmt.Errorf("serializing node ID: %w", err) + } + v, err := o.codec.Encode(node) + if err != nil { + return fmt.Errorf("serializing node: %w", err) + } + typeIndex := func(tx *bolt.Tx, nt fields.NodeType) error { + v, err := o.codec.Encode(nt) + if err != nil { + return fmt.Errorf("failed encoding node type: %w", err) + } + return index{Bucket: tx.Bucket(IndexType)}.Put(id, v) + } + ageIndex := func(tx *bolt.Tx, nt fields.NodeType, ts fields.Timestamp) error { + k, err := o.codec.Encode(ts) + if err != nil { + return fmt.Errorf("failed encoding timestamp: %w", err) + } + b, err := tx.Bucket(IndexAge).CreateBucketIfNotExists(bucketFromNodeType(nt)) + if err != nil { + return fmt.Errorf("failed creating bucket: %w", err) + } + return index{Bucket: b}.Put(k, id) + } + childIndex := func(tx *bolt.Tx) error { + k, err := o.codec.Encode(node.ParentID()) + if err != nil { + return fmt.Errorf("failed encoding parent ID: %w", err) + } + return index{Bucket: tx.Bucket(IndexChildren)}.Put(k, id) + } + return o.DB.Update(func(tx *bolt.Tx) error { + switch n := node.(type) { + case *forest.Reply: + if err := tx.Bucket(BucketReply).Put(id, v); err != nil { + return fmt.Errorf("updating bucket: %w", err) + } + if err := typeIndex(tx, n.Type); err != nil { + return fmt.Errorf("updating Type index: %w", err) + } + if err := ageIndex(tx, n.Type, n.Created); err != nil { + return fmt.Errorf("updating Age index: %w", err) + } + + case *forest.Identity: + if err := tx.Bucket(BucketIdentity).Put(id, v); err != nil { + return fmt.Errorf("updating bucket: %w", err) + } + if err := typeIndex(tx, n.Type); err != nil { + return fmt.Errorf("updating Type index: %w", err) + } + if err := ageIndex(tx, n.Type, n.Created); err != nil { + return fmt.Errorf("updating Age index: %w", err) + } + case *forest.Community: + if err := tx.Bucket(BucketCommunity).Put(id, v); err != nil { + return fmt.Errorf("updating bucket: %w", err) + } + if err := typeIndex(tx, n.Type); err != nil { + return fmt.Errorf("updating Type index: %w", err) + } + if err := ageIndex(tx, n.Type, n.Created); err != nil { + return fmt.Errorf("updating Age index: %w", err) + } + } + return childIndex(tx) + }) +} + +// Get searches for a node with the given id. +// Present indicates whether the node exists, err indicates a failure to load it. +func (o *Orchard) Get(nodeID *fields.QualifiedHash) (node forest.Node, present bool, err error) { + if n, ok, _ := o.ReadCache.Get(nodeID); ok { + return n, ok, nil + } + defer func() { + if err == nil && node != nil { + _ = o.ReadCache.Add(node) + } + }() + var ( + nt fields.NodeType + u union + ) + id, err := o.codec.Encode(nodeID) + if err != nil { + return nil, false, fmt.Errorf("serializing node ID: %w", err) + } + return node, present, o.DB.View(func(tx *bolt.Tx) error { + v := tx.Bucket(IndexType).Get(id) + if v == nil { + node = nil + present = false + err = nil + return nil + } + present = true + if err := o.codec.Decode(v, &nt); err != nil && err != io.EOF { + return fmt.Errorf("loading node type: %w", err) + } + if err := o.codec.Decode(tx.Bucket(bucketFromNodeType(nt)).Get(id), &u); err != nil { + return fmt.Errorf("deserializing node: %w", err) + } + node = u.Node() + return nil + }) +} + +func (o *Orchard) GetIdentity(id *fields.QualifiedHash) (forest.Node, bool, error) { + return o.Get(id) +} + +func (o *Orchard) GetCommunity(id *fields.QualifiedHash) (forest.Node, bool, error) { + return o.Get(id) +} + +func (o *Orchard) GetConversation( + community *fields.QualifiedHash, + id *fields.QualifiedHash, +) (forest.Node, bool, error) { + return o.Get(id) +} + +func (o *Orchard) GetReply( + community *fields.QualifiedHash, + conversation *fields.QualifiedHash, + id *fields.QualifiedHash, +) (forest.Node, bool, error) { + return o.Get(id) +} + +// Children returns the IDs of all known child nodes of the specified ID. +func (o *Orchard) Children( + parent *fields.QualifiedHash, +) (ch []*fields.QualifiedHash, err error) { + k, err := o.codec.Encode(parent) + if err != nil { + return nil, fmt.Errorf("serializing parent ID: %w", err) + } + return ch, o.DB.View(func(tx *bolt.Tx) error { + if child := tx.Bucket(IndexChildren).Get(k); child != nil { + var id fields.QualifiedHash + if err := o.codec.Decode(child, &id); err != nil { + return fmt.Errorf("deserializing node ID: %w", err) + } + ch = append(ch, &id) + return nil + } + if b := tx.Bucket(IndexChildren).Bucket(k); b != nil { + c := b.Cursor() + for k, v := c.First(); k != nil; k, v = c.Next() { + var id fields.QualifiedHash + if err := o.codec.Decode(v, &id); err != nil { + return fmt.Errorf("deserializing node ID: %w", err) + } + ch = append(ch, &id) + } + } + return nil + }) +} + +// Recent returns a slice of nodes of a given type ordered by recency, youngest +// first. +// +// NOTE: this function may return both a valid slice of nodes and an error +// in the case that some nodes failed to be unmarshaled from disk, but others +// were successful. Calling code should always check whether the node list is +// empty before throwing it away. +func (o *Orchard) Recent(nt fields.NodeType, n int) (nodes []forest.Node, err error) { + var ( + data = make([][]byte, n) + b = bucketFromNodeType(nt) + ii = 0 + errors Errors + ) + if err := o.DB.View(func(tx *bolt.Tx) error { + c := indexCursor{ + Parent: tx.Bucket(b), + Index: tx.Bucket(IndexAge).Bucket(b), + Reverse: true, + } + for k, v := c.First(); k != nil && ii < n; k, v = c.Next() { + data[ii] = make([]byte, len(v)) + copy(data[ii], v) + ii++ + } + return nil + }); err != nil { + return nodes, fmt.Errorf("copying out node data: %w", err) + } + for ii := range data { + switch nt { + case fields.NodeTypeReply: + var n forest.Reply + if err := o.codec.Decode(data[ii], &n); err != nil { + errors = append(errors, fmt.Errorf("deserializing reply: %w", err)) + } else { + nodes = append(nodes, &n) + } + case fields.NodeTypeIdentity: + var n forest.Identity + if err := o.codec.Decode(data[ii], &n); err != nil { + errors = append(errors, fmt.Errorf("deserializing identity: %w", err)) + } else { + nodes = append(nodes, &n) + } + case fields.NodeTypeCommunity: + var n forest.Community + if err := o.codec.Decode(data[ii], &n); err != nil { + errors = append(errors, fmt.Errorf("deserializing community: %w", err)) + } else { + nodes = append(nodes, &n) + } + } + } + if len(errors) > 0 { + return nodes, errors + } + return nodes, nil +} + +// CopyInto copies all nodes from the store into the provided store. +func (o *Orchard) CopyInto(other forest.Store) error { + var data [][]byte + if err := o.DB.View(func(tx *bolt.Tx) error { + for _, b := range Buckets { + c := tx.Bucket(b).Cursor() + for k, v := c.First(); k != nil; k, v = c.Next() { + by := make([]byte, len(v)) + copy(by, v) + data = append(data, by) + } + } + return nil + }); err != nil { + return fmt.Errorf("copying out node data: %w", err) + } + for _, by := range data { + var node forest.Node + nt, err := forest.NodeTypeOf(by) + if err != nil { + return err + } + switch nt { + case fields.NodeTypeReply: + var n forest.Reply + if err := o.codec.Decode(by, &n); err != nil { + return fmt.Errorf("deserializing reply: %w", err) + } + node = &n + case fields.NodeTypeIdentity: + var n forest.Identity + if err := o.codec.Decode(by, &n); err != nil { + return fmt.Errorf("deserializing identity: %w", err) + } + node = &n + case fields.NodeTypeCommunity: + var n forest.Community + if err := o.codec.Decode(by, &n); err != nil { + return fmt.Errorf("deserializing community: %w", err) + } + node = &n + } + if err := other.Add(node); err != nil { + return fmt.Errorf("copying node: %w", err) + } + } + return nil +} + +// RemoveSubtree removes the subtree rooted at the node with the provided ID +// from the orchard. +func (o *Orchard) RemoveSubtree(id *fields.QualifiedHash) error { + node, ok, err := o.Get(id) + if err != nil { + return err + } + if !ok { + return nil + } + children, err := o.Children(id) + if err != nil { + return err + } + for _, child := range children { + if err := o.RemoveSubtree(child); err != nil { + return err + } + } + return o.delete(node) +} + +func (o *Orchard) delete(n forest.Node) error { + id, err := o.codec.Encode(n.ID()) + if err != nil { + return fmt.Errorf("serializing node ID: %w", err) + } + return o.DB.Update(func(tx *bolt.Tx) error { + return tx.Bucket(bucketFromNode(n)).Delete(id) + }) +} + +// union lays out memory big enough to fit all three forest node types. +// +// Note(jfm): there may be a better way of allocating hetrogeneous data. +type union struct { + forest.Reply + forest.Identity + forest.Community +} + +func (u *union) Node() forest.Node { + if len(u.Reply.Identifier) > 0 { + return &u.Reply + } + if len(u.Identity.Identifier) > 0 { + return &u.Identity + } + if len(u.Community.Identifier) > 0 { + return &u.Community + } + return nil +} + +func (u *union) UnmarshalBinary(b []byte) error { + buf := make([]byte, len(b)) + copy(buf, b) + n, err := forest.UnmarshalBinaryNode(buf) + if err != nil { + return err + } + switch n := n.(type) { + case *forest.Reply: + u.Reply = *n + case *forest.Identity: + u.Identity = *n + case *forest.Community: + u.Community = *n + } + return nil +} + +// bucketFromNodeType returns the corresponding bucket for a node type. +func bucketFromNodeType(nt fields.NodeType) Bucket { + switch nt { + case fields.NodeTypeReply: + return BucketReply + case fields.NodeTypeIdentity: + return BucketIdentity + case fields.NodeTypeCommunity: + return BucketCommunity + } + return nil +} + +// bucketFromNode returns the corresponding bucket for a node. +func bucketFromNode(n forest.Node) Bucket { + switch n.(type) { + case *forest.Reply: + return BucketReply + case *forest.Identity: + return BucketIdentity + case *forest.Community: + return BucketCommunity + } + return nil +} + +// Errors wraps multiple errors into a single return value. +type Errors []error + +func (e Errors) Error() string { + return fmt.Sprintf("%v", []error(e)) +} + +// RecentReplies returns up to `q` (quantity) replies older than the timestamp. +func (o *Orchard) RecentReplies( + earliest fields.Timestamp, + q int, +) (replies []forest.Reply, err error) { + return replies, o.DB.View(func(tx *bolt.Tx) error { + c := ReplyCursor{ + Inner: &indexCursor{ + Parent: tx.Bucket(BucketReply), + Index: tx.Bucket(IndexAge).Bucket(BucketReply), + Reverse: true, + }, + Codec: &o.codec, + } + var ii = 0 + for r, err := c.First(); err != io.EOF; r, err = c.Next() { + if earliest > 0 && r.Created < earliest && ii < q { + replies = append(replies, r) + ii++ + } + } + return nil + }) +} + +// RecentIdentities returns up to `q` (quantity) identities older than the timestamp. +func (o *Orchard) RecentIdentities( + earliest fields.Timestamp, + q int, +) (identities []forest.Identity, err error) { + return identities, o.DB.View(func(tx *bolt.Tx) error { + c := IdentityCursor{ + Inner: &indexCursor{ + Parent: tx.Bucket(BucketIdentity), + Index: tx.Bucket(IndexAge).Bucket(BucketIdentity), + Reverse: true, + }, + Codec: &o.codec, + } + var ii = 0 + for r, err := c.First(); err != io.EOF; r, err = c.Next() { + if earliest > 0 && r.Created < earliest && ii < q { + identities = append(identities, r) + ii++ + } + } + return nil + }) +} + +// RecentCommunities returns up to `q` (quantity) communities older than the timestamp. +func (o *Orchard) RecentCommunities( + earliest fields.Timestamp, + q int, +) (communities []forest.Community, err error) { + return communities, o.DB.View(func(tx *bolt.Tx) error { + c := CommunityCursor{ + Inner: &indexCursor{ + Parent: tx.Bucket(BucketCommunity), + Index: tx.Bucket(IndexAge).Bucket(BucketCommunity), + Reverse: true, + }, + Codec: &o.codec, + } + var ii = 0 + for r, err := c.First(); err != io.EOF; r, err = c.Next() { + if earliest > 0 && r.Created < earliest && ii < q { + communities = append(communities, r) + ii++ + } + } + return nil + }) +} + +// RecentFrom queries up to `q` (quantity) nodes of type `nt` that occur after `from`. +// To page through, pass in the next oldest timestamp from the returned nodes. +// +// NOTE(jfm): There's semantic edge cases around what it means to pass in timestamp of 0. +// Theoretically, that would mean "iterate from 0 at the earliest", which should always return no +// nodes. +// +// PERF(jfm): Performance analysis pending. +func (o *Orchard) RecentFrom( + nt fields.NodeType, + earliest time.Time, + q int, +) (nodes []forest.Node, err error) { + switch nt { + case fields.NodeTypeReply: + replies, err := o.RecentReplies(fields.TimestampFrom(earliest), q) + if err != nil { + return nil, fmt.Errorf("%s: %w", nt, err) + } + for ii := range replies { + if ii >= q { + break + } + nodes = append(nodes, &replies[ii]) + } + case fields.NodeTypeIdentity: + identities, err := o.RecentIdentities(fields.TimestampFrom(earliest), q) + if err != nil { + return nil, fmt.Errorf("%s: %w", nt, err) + } + for ii := range identities { + if ii >= q { + break + } + nodes = append(nodes, &identities[ii]) + } + case fields.NodeTypeCommunity: + communities, err := o.RecentCommunities(fields.TimestampFrom(earliest), q) + if err != nil { + return nil, fmt.Errorf("%s: %w", nt, err) + } + for ii := range communities { + if ii >= q { + break + } + nodes = append(nodes, &communities[ii]) + } + } + return nodes, nil +} + +// ChildrenBatched traverses the children of a node in fixed-sized batches. +func (o *Orchard) ChildrenBatched( + parent *fields.QualifiedHash, + q, offset int, +) (ch []*fields.QualifiedHash, total int, err error) { + // 1. Seek to the offset. + // 2. Collect up to `q` IDs and return the collected amount. + k, err := o.codec.Encode(parent) + if err != nil { + return nil, 0, fmt.Errorf("serializing parent ID: %w", err) + } + defer func() { + total = len(ch) + }() + return ch, total, o.DB.View(func(tx *bolt.Tx) error { + if child := tx.Bucket(IndexChildren).Get(k); child != nil { + // If offset > 0 but we only have a single child, then it must have already been + // processed. + if offset > 0 { + return nil + } + var id fields.QualifiedHash + if err := o.codec.Decode(child, &id); err != nil { + return fmt.Errorf("deserializing node ID: %w", err) + } + ch = append(ch, &id) + return nil + } + if b := tx.Bucket(IndexChildren).Bucket(k); b != nil { + var ( + c = b.Cursor() + k, v = c.First() + ii int + ) + for jj := 0; jj < offset; jj++ { + k, v = c.Next() + } + if k == nil { + return nil + } + for { + if k == nil || ii >= q { + break + } + var id fields.QualifiedHash + if err := o.codec.Decode(v, &id); err != nil { + return fmt.Errorf("deserializing node ID: %w", err) + } + ch = append(ch, &id) + k, v = c.Next() + ii++ + } + } + return nil + }) +} diff --git a/orchard/orchard_test.go b/orchard/orchard_test.go new file mode 100644 index 0000000..8277fa4 --- /dev/null +++ b/orchard/orchard_test.go @@ -0,0 +1,447 @@ +package orchard + +import ( + "io/ioutil" + "os" + "path/filepath" + "reflect" + "testing" + + "git.sr.ht/~whereswaldon/forest-go" + "git.sr.ht/~whereswaldon/forest-go/fields" + "git.sr.ht/~whereswaldon/forest-go/orchard/internal/codec" + "git.sr.ht/~whereswaldon/forest-go/orchard/internal/mock" +) + +// TestRecentFrom tests that nodes can be queried in batches, starting at a given timestamp. +func TestRecentFrom(t *testing.T) { + for _, tt := range []struct { + Label string + Expect string + Type fields.NodeType + Earliest fields.Timestamp + Quantity int + Nodes mock.Nodes + Want []mock.Nodes + }{ + { + Label: "empty store & empty query", + Expect: "no nodes returned", + Type: fields.NodeTypeReply, + Earliest: 0, + Quantity: 0, + Nodes: nil, + Want: nil, + }, + { + Label: "query against empty store", + Expect: "no nodes returned", + Type: fields.NodeTypeReply, + Earliest: 10, + Quantity: 3, + Nodes: nil, + Want: nil, + }, + { + Label: "empty query on populated store", + Expect: "no nodes returned", + Type: fields.NodeTypeReply, + Earliest: 0, + Quantity: 0, + Nodes: []mock.Node{ + { + Type: fields.NodeTypeReply, + ID: "0", + Created: 0, + }, + }, + Want: nil, + }, + { + Label: "query of one & store of one", + Expect: "exactly one node returned", + Type: fields.NodeTypeReply, + Earliest: 2, + Quantity: 1, + Nodes: []mock.Node{ + { + Type: fields.NodeTypeReply, + ID: "1", + Created: 1, + }, + }, + Want: []mock.Nodes{ + []mock.Node{ + { + Type: fields.NodeTypeReply, + ID: "1", + Created: 1, + }, + }, + }, + }, + { + Label: "query larger than store", + Expect: "only valid nodes returned, regardless of query size", + Type: fields.NodeTypeReply, + Earliest: 1, + Quantity: 2, + Nodes: []mock.Node{ + { + Type: fields.NodeTypeReply, + ID: "0", + Created: 0, + }, + }, + Want: []mock.Nodes{ + []mock.Node{{ + Type: fields.NodeTypeReply, + ID: "0", + Created: 0, + }}, + }, + }, + { + Label: "store larger than query", + Expect: "return exactly the number of queried nodes", + Type: fields.NodeTypeReply, + Earliest: 3, + Quantity: 2, + Nodes: []mock.Node{ + { + Type: fields.NodeTypeReply, + ID: "0", + Created: 0, + }, + { + Type: fields.NodeTypeReply, + ID: "1", + Created: 1, + }, + { + Type: fields.NodeTypeReply, + ID: "2", + Created: 2, + }, + }, + Want: []mock.Nodes{ + []mock.Node{ + { + Type: fields.NodeTypeReply, + ID: "2", + Created: 2, + }, + { + Type: fields.NodeTypeReply, + ID: "1", + Created: 1, + }, + }, + []mock.Node{ + { + Type: fields.NodeTypeReply, + ID: "0", + Created: 0, + }, + }, + }, + }, + { + Label: "large enough query, but not enough qualifying nodes", + Expect: "return only the qualifying nodes, regardless of query size", + Type: fields.NodeTypeReply, + Earliest: 1, + Quantity: 3, + Nodes: []mock.Node{ + { + Type: fields.NodeTypeReply, + ID: "0", + Created: 0, + }, + { + Type: fields.NodeTypeReply, + ID: "1", + Created: 1, + }, + { + Type: fields.NodeTypeReply, + ID: "2", + Created: 2, + }, + { + Type: fields.NodeTypeReply, + ID: "3", + Created: 3, + }, + { + Type: fields.NodeTypeReply, + ID: "4", + Created: 4, + }, + { + Type: fields.NodeTypeReply, + ID: "5", + Created: 5, + }, + }, + Want: []mock.Nodes{ + []mock.Node{{ + Type: fields.NodeTypeReply, + ID: "0", + Created: 0, + }}, + }, + }, + } { + t.Run(tt.Label, func(t *testing.T) { + orchard := setup(t, "recent_from", tt.Nodes.Into()...) + var ( + end = tt.Earliest.Time() + got []mock.Nodes + ) + // Query more nodes until there are none left. + for { + nodes, err := orchard.RecentFrom(tt.Type, end, tt.Quantity) + if err != nil { + t.Fatalf("RecentFrom: unexpected error: %+#v", err) + } + if len(nodes) == 0 { + break + } + got = append(got, mock.FromNodes(nodes)) + end = nodes[len(nodes)-1].CreatedAt() + } + // Sanity check the results. + if got, want := len(got), len(tt.Want); got > want { + t.Fatalf("too many pages: want %d, got %d", want, got) + } else if got < want { + t.Fatalf("too few pages: want %d, got %d", want, got) + } + // Compare each page, in order. + for ii, got := range got { + expect(t, got, tt.Want[ii]) + } + }) + } +} + +// TestChildrenBatched tests that child IDs can be queried in batches, starting at a given offset. +func TestChildrenBatched(t *testing.T) { + for _, tt := range []struct { + Label string + Expect string + Root mock.ID + Quantity int + Nodes mock.Nodes + Want []mock.IDs + }{ + // { + // Label: "empty query against empty store", + // Expect: "return nil", + // Quantity: 0, + // Nodes: nil, + // Want: nil, + // }, + // { + // Label: "query against empty store", + // Expect: "return nil", + // Quantity: 10, + // Nodes: nil, + // Want: nil, + // }, + // { + // Label: "empty query against store", + // Expect: "return nil", + // Quantity: 0, + // Nodes: []mock.Node{ + // { + // ID: "0", + // }, + // { + // ID: "1", + // }, + // { + // ID: "2", + // }, + // }, + // Want: nil, + // }, + { + Label: "single child id", + Expect: "return that child id regardless of query size", + Quantity: 3, + Root: "0", + Nodes: []mock.Node{ + { + ID: "0", + }, + { + ID: "1", + Parent: "0", + }, + }, + Want: []mock.IDs{{"1"}}, + }, + { + Label: "exact query against store", + Expect: "return exact child ids", + Quantity: 3, + Root: "0", + Nodes: []mock.Node{ + { + ID: "0", + }, + { + ID: "1", + Parent: "0", + }, + { + ID: "2", + Parent: "0", + }, + { + ID: "3", + Parent: "0", + }, + }, + Want: []mock.IDs{{"1", "2", "3"}}, + }, + { + Label: "query smaller than number of children", + Expect: "page through children in query sized batches", + Quantity: 2, + Root: "0", + Nodes: []mock.Node{ + { + ID: "0", + }, + { + ID: "1", + Parent: "0", + }, + { + ID: "2", + Parent: "0", + }, + { + ID: "3", + Parent: "0", + }, + { + ID: "4", + Parent: "0", + }, + { + ID: "5", + Parent: "0", + }, + }, + Want: []mock.IDs{ + {"1", "2"}, + {"3", "4"}, + {"5"}, + }, + }, + { + Label: "query larger than number of children", + Expect: "single batch contains all children", + Quantity: 3, + Root: "0", + Nodes: []mock.Node{ + { + ID: "0", + }, + { + ID: "1", + Parent: "0", + }, + { + ID: "2", + Parent: "0", + }, + }, + Want: []mock.IDs{ + {"1", "2"}, + }, + }, + } { + t.Run(tt.Label, func(t *testing.T) { + orchard := setup(t, "children_batched", tt.Nodes.Into()...) + var ( + offset = 0 + got []mock.IDs + ) + // Query until there are none left. + for { + ch, total, err := orchard.ChildrenBatched( + tt.Root.IntoQualifiedHash(), + tt.Quantity, + offset, + ) + if err != nil { + t.Fatalf("RecentFrom: unexpected error: %+#v", err) + } + if len(ch) == 0 { + break + } + got = append(got, mock.FromQualifiedHashList(ch)) + offset += total + } + // Sanity check the results. + if got, want := len(got), len(tt.Want); got > want { + t.Fatalf("too many pages: want %d, got %d", want, got) + } else if got < want { + t.Fatalf("too few pages: want %d, got %d", want, got) + } + // Compare each page, in order. + for ii, got := range got { + expect(t, got, tt.Want[ii]) + } + }) + } +} + +// setup creates an orchard using a unique temporary file. +// After the run, database files are moved to a known location with the specified prefix to +// disambiguate between test runs. +func setup(t *testing.T, prefix string, nodes ...forest.Node) *Orchard { + path, err := ioutil.TempDir("", "orchard.*.test") + if err != nil { + t.Fatalf("preparing temporary file: %v", err) + } + path = filepath.Join(path, "orchard.test.db") + o, err := Open(path) + if err != nil { + t.Fatalf("opening orchard: %v", err) + } + o.codec.Inner = codec.Unsafe{} + for _, n := range nodes { + if err := o.Add(n); err != nil { + t.Fatalf("adding nodes to store: %v", err) + } + } + t.Cleanup(func() { + if err := o.Close(); err != nil { + t.Logf("error: closing database file: %v", err) + } + var ( + dst = filepath.Join(os.TempDir(), prefix, "orchard.test.db") + ) + if err := os.MkdirAll(filepath.Dir(dst), 0644); err != nil { + t.Logf("error: preparing data dir: %v", err) + } + if err := os.Rename(path, dst); err != nil { + t.Logf("error: removing temporary files: %v", err) + } + t.Logf("db: %v\n", dst) + }) + return o +} + +func expect(t *testing.T, got, want interface{}) bool { + if !reflect.DeepEqual(got, want) { + t.Errorf("want != got \nwant: %+#v \n got: %+#v", want, got) + return false + } + return true +} diff --git a/serialize/serializer.go b/serialize/serializer.go index 6b167d6..1921ad8 100644 --- a/serialize/serializer.go +++ b/serialize/serializer.go @@ -4,6 +4,7 @@ import ( "bytes" "encoding" "fmt" + "io" "reflect" "strconv" "strings" @@ -82,11 +83,11 @@ func ensureIsProgressiveBinaryUnmarshaler(in reflect.Value) bool { func ensureSatisfies(field reflect.Value, satisfies satisfyChecker) (reflect.Value, error) { if !satisfies(field) { if !field.CanAddr() { - return field, fmt.Errorf("Value does not implement encoding.BinaryMarshaler, and cannot take address") + return field, fmt.Errorf("Value does not implement expected interface, and cannot take address") } // see whether a pointer to the field satisfies the interface if !satisfies(field.Addr()) { - return field, fmt.Errorf("Neither value not pointer to value implement encoding.BinaryMarshaler") + return field, fmt.Errorf("Neither value not pointer to value implement expected interface") } field = field.Addr() } @@ -125,15 +126,23 @@ func getEntry(field reflect.Value, tag string) (*serialEntry, error) { return entry, nil } +// dereference recursively dereferences pointers until it encounters a value +// that is not of Kind reflect.Ptr, which it returns. +func dereference(value reflect.Value) reflect.Value { + if value.Kind() == reflect.Ptr { + return dereference(value.Elem()) + } + return value +} + // convert the given reflect.Value (of a struct) into a slice of serialEntry // structs describing how to {de,serialize} its fields. The interfaceTest function is // used to ensure that the reflect.Value elements satisfy a specific interface. func getSerializationFields(value reflect.Value) ([]*serialEntry, error) { const arborTag = "arbor" // dereference a pointer if we've been given one - if value.Kind() == reflect.Ptr { - value = value.Elem() - } + value = dereference(value) + // ensure input is a struct if value.Kind() != reflect.Struct { return nil, fmt.Errorf("expected a struct, got Kind %d", value.Kind()) @@ -197,27 +206,58 @@ func ArborSerializeConfig(value reflect.Value, config SerializationConfig) ([]by } continue } - // ensure supports Marshaling - field.value, err = ensureSatisfies(field.value, ensureIsEncodingBinaryMarshaler) - if err != nil { - return nil, err - } - marshaler, ok := field.value.Interface().(encoding.BinaryMarshaler) - if !ok { - return nil, fmt.Errorf("Tagged non-recursive field does not implement encoding.BinaryMarshaler") - } - data, err := marshaler.MarshalBinary() - if err != nil { - return nil, err - } - _, err = serialized.Write(data) - if err != nil { + if err := attemptBinaryMarshal(&serialized, field.value); err != nil { return nil, err } } return serialized.Bytes(), nil } +// attemptBinaryMarshal attempts to marshal the given value into w +// using the standard encoding.BinaryMarshaler interface. +func attemptBinaryMarshal(w io.Writer, value reflect.Value) error { + // ensure supports Marshaling + value, err := ensureSatisfies(value, ensureIsEncodingBinaryMarshaler) + if err != nil { + return err + } + marshaler, ok := value.Interface().(encoding.BinaryMarshaler) + if !ok { + return fmt.Errorf("Tagged non-recursive field does not implement encoding.BinaryMarshaler") + } + data, err := marshaler.MarshalBinary() + if err != nil { + return err + } + _, err = w.Write(data) + if err != nil { + return err + } + return nil +} + +// attemptBinaryUnmarshal unmarshals data from data into the provided value +// if it supports an interface for doing so. +func attemptBinaryUnmarshal(data []byte, value reflect.Value) (unused []byte, err error) { + value, err = ensureSatisfies(value, ensureIsProgressiveBinaryUnmarshaler) + if err != nil { + return nil, err + } + unmarshaler, ok := value.Interface().(ProgressiveBinaryUnmarshaler) + if !ok { + return nil, fmt.Errorf("Tagged non-recursive field does not implement ProgressiveBinaryUnmarshaler") + } + err = unmarshaler.UnmarshalBinary(data) + if err != nil { + return nil, err + } + bytesConsumed := unmarshaler.BytesConsumed() + if bytesConsumed > len(data) { + return nil, fmt.Errorf("field %v.BytesConsumed() returned %d, but only %d bytes in slice", value, bytesConsumed, len(data)) + } + return data[bytesConsumed:], nil +} + // ArborDeserialize unpacks the given bytes into the given reflect.Value // (corresponding to a struct). It returns any bytes that were not needed // to deserialize the struct. @@ -238,24 +278,10 @@ func ArborDeserialize(value reflect.Value, data []byte) (unused []byte, err erro } continue } - // ensure supports Unmarshaling - field.value, err = ensureSatisfies(field.value, ensureIsProgressiveBinaryUnmarshaler) + data, err = attemptBinaryUnmarshal(data, field.value) if err != nil { return nil, err } - unmarshaler, ok := field.value.Interface().(ProgressiveBinaryUnmarshaler) - if !ok { - return nil, fmt.Errorf("Tagged non-recursive field does not implement ProgressiveBinaryUnmarshaler") - } - err := unmarshaler.UnmarshalBinary(data) - if err != nil { - return nil, err - } - bytesConsumed := unmarshaler.BytesConsumed() - if bytesConsumed > len(data) { - return nil, fmt.Errorf("field %v.BytesConsumed() returned %d, but only %d bytes in slice", field.value, bytesConsumed, len(data)) - } - data = data[bytesConsumed:] } return data, nil } diff --git a/signer_test.go b/signer_test.go index c3d717a..04cd542 100644 --- a/signer_test.go +++ b/signer_test.go @@ -56,7 +56,15 @@ func getGPGSignerOrFail(t *testing.T) (forest.Signer, func()) { } cleanup := func() { os.RemoveAll(tempdir) } - gpg2 := exec.Command(gpgExec, "--yes", "--batch", "--pinentry-mode", "loopback", "--import", tempkey.Name()) + gpg2 := exec.Command( + gpgExec, + "--yes", + "--batch", + "--pinentry-mode", + "loopback", + "--import", + tempkey.Name(), + ) gpg2.Env = []string{"GNUPGHOME=" + tempdir} stderr, _ := gpg2.StderrPipe() if err := gpg2.Run(); err != nil { @@ -72,7 +80,17 @@ func getGPGSignerOrFail(t *testing.T) (forest.Signer, func()) { cleanup() } signer.Rewriter = func(gpg2 *exec.Cmd) error { - gpg2.Args = append(append(gpg2.Args[:1], "--yes", "--batch", "--pinentry-mode", "loopback", "--passphrase", testkeys.TestKeyPassphrase), gpg2.Args[1:]...) + gpg2.Args = append( + append( + gpg2.Args[:1], + "--yes", + "--batch", + "--pinentry-mode", + "loopback", + "--passphrase", + testkeys.TestKeyPassphrase, + ), + gpg2.Args[1:]...) gpg2.Env = []string{"GNUPGHOME=" + tempdir} gpg2.Stderr = os.Stderr return nil diff --git a/store.go b/store.go index 82a206e..3282434 100644 --- a/store.go +++ b/store.go @@ -1,13 +1,13 @@ package forest import ( + "time" + "git.sr.ht/~whereswaldon/forest-go/fields" ) // Store describes a collection of `forest.Node`. type Store interface { - // CopyInto allows transformations between Store instances. - CopyInto(Store) error // Get retrieves a node by ID. Get(*fields.QualifiedHash) (Node, bool, error) // GetIdentity retrieves an identity node by ID. @@ -30,3 +30,23 @@ type Store interface { // RemoveSubtree from the store. RemoveSubtree(*fields.QualifiedHash) error } + +// Copiable stores can copy themselves into another store. +type Copiable interface { + CopyInto(Store) error +} + +// Paginated stores can page through nodes with a series of queries. +type Paginated interface { + // ChildrenBatched allows traversing the children of the node in fixed-size batches. The + // children should be paged-through in ascending order of CreatedAt timestamp. + ChildrenBatched( + root *fields.QualifiedHash, + quantity, offset int, + ) (batch []*fields.QualifiedHash, total int, err error) + // RecentFrom allows paging through the nodes relative to a specific time. + // Each call advances the from the previous position until there are no more nodes. + // The page size is specified by the quantity parameter. + // Earliest time specifies where to iterate from. + RecentFrom(nt fields.NodeType, earliest time.Time, quantity int) ([]Node, error) +} diff --git a/store/archive.go b/store/archive.go index e258a8f..5c0616a 100644 --- a/store/archive.go +++ b/store/archive.go @@ -120,7 +120,9 @@ func (m *Archive) unsubscribeInMap(targetMap map[Subscription]func(forest.Node), func (m *Archive) CopyInto(s forest.Store) (err error) { m.executeAsync(func() { - err = m.store.CopyInto(s) + if c, ok := m.store.(forest.Copiable); ok { + err = c.CopyInto(s) + } }) return } @@ -131,7 +133,6 @@ func (m *Archive) Get(id *fields.QualifiedHash) (node forest.Node, present bool, }) return } - func (m *Archive) GetIdentity(id *fields.QualifiedHash) (node forest.Node, present bool, err error) { m.executeAsync(func() { node, present, err = m.store.GetIdentity(id) diff --git a/store/cache-store.go b/store/cache-store.go index bd9f8d8..1db0090 100644 --- a/store/cache-store.go +++ b/store/cache-store.go @@ -26,8 +26,10 @@ var _ forest.Store = &CacheStore{} // fast in-memory implementations as the `cache` layer and disk or // network-based implementations as the `base` layer. func NewCacheStore(cache, back forest.Store) (*CacheStore, error) { - if err := cache.CopyInto(back); err != nil { - return nil, err + if c, ok := cache.(forest.Copiable); ok { + if err := c.CopyInto(back); err != nil { + return nil, err + } } return &CacheStore{cache, back}, nil } @@ -40,7 +42,10 @@ func (m *CacheStore) Get(id *fields.QualifiedHash) (forest.Node, bool, error) { } func (m *CacheStore) CopyInto(other forest.Store) error { - return m.Back.CopyInto(other) + if c, ok := m.Back.(forest.Copiable); ok { + return c.CopyInto(other) + } + return fmt.Errorf("cannot copy store: Copiable interface not implemented %T", m.Back) } // Add inserts the given node into both stores of the CacheStore diff --git a/store/store_test.go b/store/store_test.go index 3d9914f..e6796b9 100644 --- a/store/store_test.go +++ b/store/store_test.go @@ -1,10 +1,21 @@ package store_test import ( + "errors" + "fmt" + "io" + "io/ioutil" + "math/rand" + "os" + "path/filepath" + "reflect" "testing" + "time" forest "git.sr.ht/~whereswaldon/forest-go" "git.sr.ht/~whereswaldon/forest-go/fields" + "git.sr.ht/~whereswaldon/forest-go/grove" + "git.sr.ht/~whereswaldon/forest-go/orchard" "git.sr.ht/~whereswaldon/forest-go/store" "git.sr.ht/~whereswaldon/forest-go/testutil" ) @@ -14,6 +25,127 @@ func TestMemoryStore(t *testing.T) { testStandardStoreInterface(t, s, "MemoryStore") } +func TestGroveStore(t *testing.T) { + path := filepath.Join(os.TempDir(), "grove") + if err := func() error { + if err := os.RemoveAll(path); err != nil { + return err + } + if err := os.MkdirAll(path, 0777); err != nil { + return err + } + return nil + }(); err != nil { + t.Skipf("preparing filesystem: %v", err) + } + g, err := grove.New(path) + if err != nil { + t.Skipf("preparing filesystem: %v", err) + } + testStandardStoreInterface(t, g, "Grove") +} + +// TestOrchardConcrete tests the concrete Orchard type. +func TestOrchardConcrete(t *testing.T) { + o, g := setup(t, "TestOrchardConcrete") + testOrchard(t, o, g, "Orchard") +} + +// TestOrchardStore exercises the Orchard against the Store interface. +func TestOrchardStore(t *testing.T) { + o, _ := setup(t, "TestOrchardStore") + testStandardStoreInterface(t, o, "Orchard") +} + +// testOrchard tests against a mock store pre-populated with nodes. +// The nodes must be copied over, be retrievable, and queried correctly. +// Memory store is used as a convenience to iterate over all nodes in memory. +func testOrchard(t *testing.T, s, mock forest.Store, prefix string) { + var mem = store.NewMemoryStore() + copier, ok := mock.(forest.Copiable) + if !ok { + t.Fatalf("mocked store must implement forest.Copiable") + } + if err := copier.CopyInto(s); err != nil { + t.Fatalf("populating store with mock nodes: %v", err) + } + if err := copier.CopyInto(mem); err != nil { + t.Fatalf("populating mem store with mock nodes: %v", err) + } + for _, n := range mem.Items { + got, ok, err := s.Get(n.ID()) + if err != nil { + t.Errorf("%s: getting node from store: %v", prefix, err) + } + if !ok { + t.Errorf("%s: node not gettable from store: %T %s", prefix, n, n.ID()) + } + if ok && got == nil { + t.Errorf("%s: expected non-nil node: %v", prefix, got) + } + if got != nil && !got.Equals(n) { + t.Errorf("%s: got node not equal to original node", prefix) + } + } + for _, parent := range mem.Items { + got, err := s.Children(parent.ID()) + if err != nil { + t.Errorf("%s: retrieving children: %v", prefix, err) + } + want, err := mem.Children(parent.ID()) + if err != nil { + t.Errorf("%s: computing children: %v", prefix, err) + } + for _, w := range want { + if !IDList(got).Contains(w) { + n, _, err := mem.Get(w) + if err != nil { + t.Errorf("%s: getting missing node: %v", prefix, err) + } else { + t.Errorf("%s: missing: %T %s of parent %T %s", prefix, n, n.ID(), parent, parent.ID()) + } + } + } + for _, g := range got { + if !IDList(want).Contains(g) { + n, _, err := s.Get(g) + if err != nil { + t.Errorf("%s: getting extraneous node: %v", prefix, err) + } else { + t.Errorf("%s: extraneous: %T %s to parent %T %s", prefix, n, n.ID(), parent, parent.ID()) + } + } + } + } + for _, nt := range []fields.NodeType{ + fields.NodeTypeReply, + fields.NodeTypeIdentity, + fields.NodeTypeCommunity, + } { + const maxquery = 1000 + for ii := 0; ii < maxquery; ii += (ii + 1) { + query := ii + got, err := s.Recent(nt, query) + if err != nil { + t.Fatalf("%s: unexpected error getting recent nodes: %v", prefix, err) + } + if !isHomogeneous(nt, got) { + t.Fatalf("%s Recent(%s, %d) has hetrogeneous nodes", prefix, nt, query) + } + want, err := mock.Recent(nt, query) + if err != nil { + t.Fatalf("%s: unexpected error getting recent nodes: %v", prefix, err) + } + if !isHomogeneous(nt, want) { + t.Fatalf("%s Recent(%s, %d) has hetrogeneous nodes", prefix, nt, query) + } + if !reflect.DeepEqual(infos(want), infos(got)) { + t.Fatalf("%s: failed query Recent(%s, %d)", prefix, nt, query) + } + } + } +} + func testStandardStoreInterface(t *testing.T, s forest.Store, storeImplName string) { // create three test nodes, one of each type identity, _, community, reply := testutil.MakeReplyOrSkip(t) @@ -40,7 +172,7 @@ func testStandardStoreInterface(t *testing.T, s forest.Store, storeImplName stri } else if err != nil { t.Errorf("Empty %s Get() should not err with %s", storeImplName, err) } else if node != nil { - t.Errorf("Empty %s Get() should return none-nil node %v", storeImplName, node) + t.Errorf("Empty %s Get() should return nil node, got %s", storeImplName, node) } } } @@ -60,6 +192,9 @@ func testStandardStoreInterface(t *testing.T, s forest.Store, storeImplName stri {identity, []string{"get", "identity"}}, {community, []string{"get", "community"}}, {reply, []string{"get", "conversation", "reply"}}, + {identity, []string{"get"}}, + {community, []string{"get"}}, + {reply, []string{"get"}}, } // ensure all getters work for each node @@ -79,21 +214,26 @@ func testStandardStoreInterface(t *testing.T, s forest.Store, storeImplName stri // map nodes to the children that they ought to have within the store nodesToChildren := []struct { forest.Node - children []*fields.QualifiedHash + children []forest.Node }{ - {identity, []*fields.QualifiedHash{}}, - {community, []*fields.QualifiedHash{reply.ID()}}, - {reply, []*fields.QualifiedHash{}}, + {identity, []forest.Node{}}, + {community, []forest.Node{reply}}, + {reply, []forest.Node{}}, } // check each node has its proper children for _, childConfig := range nodesToChildren { if children, err := s.Children(childConfig.ID()); err != nil { - t.Errorf("%s should not error fetching children of %v", storeImplName, childConfig.ID()) + t.Errorf( + "%s should not error fetching children of %v: %q", + storeImplName, + childConfig.ID().Blob, + err, + ) } else { for _, child := range childConfig.children { - if !containsID(children, child) { - t.Errorf("%s should have %v as a child of %v", storeImplName, child, childConfig.ID()) + if !containsID(children, child.ID()) { + t.Errorf("%s should have %T %v as a child of %T %v (got %v)", storeImplName, child, child.ID(), childConfig.Node, childConfig.ID(), children) } } } @@ -122,7 +262,7 @@ func testStandardStoreInterface(t *testing.T, s forest.Store, storeImplName stri if err != nil { t.Errorf("Recent failed on valid input: %v", err) } else if len(recentNodes) < 1 { - t.Errorf("Recent on store with data returned too few results") + t.Errorf("Recent on store with data returned too few results: want %d, got %d", 1, len(recentNodes)) } else if !recentNodes[0].Equals(run.atZero) { t.Errorf("Expected most recent node to be the newly created one") } @@ -130,7 +270,7 @@ func testStandardStoreInterface(t *testing.T, s forest.Store, storeImplName stri if err != nil { t.Errorf("Recent failed on valid input: %v", err) } else if len(recentNodes) < 2 { - t.Errorf("Recent on store with data returned too few results") + t.Errorf("Recent on store with data returned too few results: want %d, got %d", 2, len(recentNodes)) } else if !recentNodes[0].Equals(run.atZero) { t.Errorf("Expected most recent node to be the newly created one") } else if !recentNodes[1].Equals(run.atOne) { @@ -140,18 +280,9 @@ func testStandardStoreInterface(t *testing.T, s forest.Store, storeImplName stri if err != nil { t.Errorf("Recent failed on valid input: %v", err) } else if len(recentNodes) > 2 { - t.Errorf("Recent on store with only two matching nodes returned more than 2 results") - } - } -} - -func containsID(ids []*fields.QualifiedHash, id *fields.QualifiedHash) bool { - for _, current := range ids { - if current.Equals(id) { - return true + t.Errorf("Recent (%v) on store with only two matching nodes returned more than 2 results: %#v", run.NodeType, recentNodes) } } - return false } func TestCacheStore(t *testing.T) { @@ -259,3 +390,380 @@ func TestArchiveDescendantsOf(t *testing.T) { try(t, community.ID(), 1) try(t, identity.ID(), 0) } + +func BenchmarkOrchard(b *testing.B) { + benchmarkStore(b, loadStore(useOrchard)) +} + +func BenchmarkGrove(b *testing.B) { + benchmarkStore(b, loadStore(useGrove)) +} + +func BenchmarkMemory(b *testing.B) { + benchmarkStore(b, loadStore(useMemory)) +} + +func benchmarkStore(b *testing.B, load Load) { + benches := []struct { + Name string + Size Size + }{ + {"nano", Size{Identities: 5, Replies: 10}}, + {"micro", Size{Identities: 5, Replies: 20}}, + {"small", Size{Identities: 10, Replies: 40}}, + {"nominal", Size{Identities: 20, Replies: 80}}, + {"medium", Size{Identities: 40, Replies: 160}}, + {"large", Size{Identities: 80, Replies: 320}}, + {"uber", Size{Identities: 160, Replies: 640}}, + } + for _, bench := range benches { + s, err := load(bench.Size) + if err != nil { + b.Skipf("loading store: %v", err) + } + b.Run(fmt.Sprintf("Recent/%s", bench.Name), func(b *testing.B) { + var ( + recent []forest.Node + err error + ) + b.ReportAllocs() + b.ResetTimer() + for ii := 0; ii < b.N; ii++ { + recent, err = s.Recent(fields.NodeTypeReply, 100) + if err != nil { + b.Errorf("loading %d recent nodes: %v", 100, err) + } + _ = recent + _ = err + } + }) + b.Run(fmt.Sprintf("Get/%s", bench.Name), func(b *testing.B) { + var ( + n forest.Node + ok bool + err error + ) + nodes, err := s.Recent(fields.NodeTypeReply, 100) + if err != nil { + b.Skipf("loading recent replies: %v", err) + } + b.ReportAllocs() + b.ResetTimer() + for ii := 0; ii < b.N; ii++ { + n, ok, err = s.Get(nodes[rand.Intn(len(nodes)-1)].ID()) + if err != nil { + b.Errorf("loading %d recent nodes: %v", 100, err) + } + _ = n + _ = ok + _ = err + } + }) + b.Run(fmt.Sprintf("Children/%s", bench.Name), func(b *testing.B) { + var ( + children []*fields.QualifiedHash + err error + ) + nodes, err := s.Recent(fields.NodeTypeReply, 100) + if err != nil { + b.Skipf("loading recent replies: %v", err) + } + b.ReportAllocs() + b.ResetTimer() + for ii := 0; ii < b.N; ii++ { + children, err = s.Children(nodes[rand.Intn(len(nodes)-1)].ID()) + if err != nil { + b.Errorf("loading %d child nodes: %v", 100, err) + } + _ = children + _ = err + } + }) + b.Run(fmt.Sprintf("LeavesOf/%s", bench.Name), func(b *testing.B) { + var ( + leaves []*fields.QualifiedHash + err error + archive = store.NewArchive(s) + ) + nodes, err := archive.Recent(fields.NodeTypeReply, 100) + if err != nil { + b.Skipf("loading recent replies: %v", err) + } + b.ReportAllocs() + b.ResetTimer() + for ii := 0; ii < b.N; ii++ { + leaves, err = archive.LeavesOf(nodes[rand.Intn(len(nodes)-1)].ID()) + if err != nil { + b.Errorf("loading %d leaf nodes: %v", 100, err) + } + _ = leaves + _ = err + } + }) + if closer, ok := s.(interface{ Close() error }); ok { + if err := closer.Close(); err != nil { + b.Logf("closing: %v", err) + } + } + } +} + +// GenerateCommunity creates a community with nIdentities and nReplies per +// identity and writes the nodes to the provided store. +func GenerateCommunity(nIdentities, nReplies int, s forest.Store) { + var ( + community = testutil.MockCommunity(forest.As(testutil.MockIdentity()), "test-community") + conversations []*forest.Reply + builders []*forest.Builder + ) + if err := s.Add(community); err != nil { + panic(fmt.Errorf("adding node to memory store: %w", err)) + } + for ii := 0; ii < nIdentities; ii++ { + b := forest.As(testutil.MockIdentity()) + // Random timestamp up to the max value of an int. + b.Timer = func() time.Time { + return fields.Timestamp(rand.Intn(int(^uint(0) >> 1))).Time() + } + builders = append(builders, b) + if err := s.Add(b.User); err != nil { + panic(fmt.Errorf("adding node to memory store: %w", err)) + } + reply := testutil.MockReply(b, community) + if err := s.Add(reply); err != nil { + panic(fmt.Errorf("adding node to memory store: %w", err)) + } + conversations = append(conversations, reply) + } + // Reply to conversations up to nReplies per identity. + for _, b := range builders { + for ii := 0; ii < nReplies; ii++ { + if err := s.Add(testutil.MockReply(b, conversations[rand.Intn(len(conversations))])); err != nil { + panic(fmt.Errorf("adding node to memory store: %w", err)) + } + } + } +} + +// info is a helper that gets printed without noise during `assert.ElementsMatch`. +type info struct { + ID string + Type string + Created fields.Timestamp +} + +func infos(nodes []forest.Node) (idlist []info) { + for _, n := range nodes { + var t fields.NodeType + switch n := n.(type) { + case *forest.Reply: + t = n.Type + case *forest.Identity: + t = n.Type + case *forest.Community: + t = n.Type + } + idlist = append(idlist, info{ + ID: n.ID().String(), + Type: t.String(), + Created: fields.TimestampFrom(n.CreatedAt()), + }) + } + return idlist +} + +func isHomogeneous(nt fields.NodeType, nodes []forest.Node) bool { + for _, n := range nodes { + switch n := n.(type) { + case *forest.Reply: + if n.Type != nt { + return false + } + case *forest.Identity: + if n.Type != nt { + return false + } + case *forest.Community: + if n.Type != nt { + return false + } + } + } + return true +} + +func containsID(ids []*fields.QualifiedHash, id *fields.QualifiedHash) bool { + for _, current := range ids { + if current.Equals(id) { + return true + } + } + return false +} + +// Size specifies the size of a community in terms of identities and replies per +// identity. +// Functions as a key for loading a store. +type Size struct { + Identities int + Replies int +} + +// Load can load a store. +type Load = func(sz Size) (forest.Store, error) + +// Init will return a store for the given key. +// Bool indicates whether the store been initialised with a community. +type Init = func(key string) (s forest.Store, ok bool, err error) + +// loadStore loads a store using the initializer function. +// +// This indirection allows us to implement a lazy loading of stores, where the +// community will be generated if it doesn't exist, and re-used if it does. +// +// This is because generating communities is expensive due to the cryptography +// involved. +func loadStore(init Init) Load { + return func(sz Size) (forest.Store, error) { + s, ok, err := init(fmt.Sprintf("%d_%d_", sz.Identities, sz.Replies)) + if err != nil { + return nil, err + } + if !ok { + GenerateCommunity(sz.Identities, sz.Replies, s) + } + return s, nil + } +} + +// useOrchard will return or create an orchard with the given prefix. +func useOrchard(prefix string) (s forest.Store, exists bool, err error) { + path := filepath.Join(os.TempDir(), "benchmark_cache", fmt.Sprintf("%sorchard.db", prefix)) + if _, err := os.Stat(path); err == nil { + exists = true + } + o, err := orchard.Open(path) + if err != nil { + return nil, false, err + } + return o, exists, nil +} + +// useGrove will return or create a grove with the given prefix. +func useGrove(prefix string) (s forest.Store, exists bool, err error) { + path := filepath.Join(os.TempDir(), "benchmark_cache", fmt.Sprintf("%sgrove", prefix)) + if _, err := os.Stat(path); err == nil { + exists = true + } + if err := os.MkdirAll(path, 0777); err != nil && !errors.Is(err, os.ErrExist) { + return nil, exists, err + } + g, err := grove.New(path) + if err != nil { + return nil, false, err + } + return g, exists, nil +} + +// useMemory will return an empty memory store. +func useMemory(prefix string) (s forest.Store, exists bool, err error) { + return store.NewMemoryStore(), false, nil +} + +// newGrove returns a Grove rooted at path, generating it if it doesn't exist. +func newGrove(path string) (g *grove.Grove, err error) { + // If grove does not exist, generate it. + if _, err := os.Stat(path); os.IsNotExist(err) { + defer GenerateCommunity(10, 10, g) + } + return grove.New(path) +} + +// copyFile copies src to dst. +func copyFile(src, dst string) error { + srcf, err := os.OpenFile(src, os.O_RDONLY, 0644) + if err != nil { + return fmt.Errorf("opening source file: %w", err) + } + defer srcf.Close() + dstf, err := os.OpenFile(src, os.O_CREATE|os.O_RDWR, 0644) + if err != nil { + return fmt.Errorf("opening destination file: %w", err) + } + defer dstf.Close() + if _, err := io.Copy(dstf, srcf); err != nil { + return fmt.Errorf("copying data: %w", err) + } + return nil +} + +type IDList []*fields.QualifiedHash + +// Contains reports whether id is contained in the list. +func (ids IDList) Contains(id *fields.QualifiedHash) bool { + for ii := range ids { + if ids[ii].Equals(id) { + return true + } + } + return false +} + +// setup initializes an Orchard store and Grove store. +// The Grove is used as the reference implementation, which the Orchard is tested against. +// After a test run, the 'orchard.db' file is moved to a known location for inspection. +func setup(t *testing.T, prefix string) (*orchard.Orchard, *grove.Grove) { + tmp, err := ioutil.TempDir("forest_go_test", "") + if err != nil { + t.Skipf("error creating testing directory: %v", err) + } + var ( + orchardPath = filepath.Join(tmp, "orchard_test.db") + grovePath = filepath.Join(tmp, "grove") + ) + if err := os.MkdirAll(filepath.Dir(orchardPath), 0644); err != nil { + t.Skipf("preparing orchard file: %v", err) + } + if err := os.MkdirAll(grovePath, 0644); err != nil { + t.Skipf("preparing grove directory: %v", err) + } + s, err := orchard.Open(orchardPath) + if err != nil { + t.Skipf("opening database: %v", err) + } + g, err := newGrove(grovePath) + if err != nil { + t.Skipf("initializing grove: %v", err) + } + t.Cleanup(func() { + if err := s.Close(); err != nil { + t.Logf("closing orchard: %v", err) + } + var ( + target = filepath.Join(os.TempDir(), "forest_go_test", prefix) + name = "orchard.db" + ) + if err := func() error { + if err := os.RemoveAll(target); err != nil { + return fmt.Errorf("cleaning target path: %w", err) + } + if err := os.RemoveAll(grovePath); err != nil { + return fmt.Errorf("cleaning Grove: %w", err) + } + if err := os.RemoveAll(target); err != nil { + return fmt.Errorf("cleaning target path: %w", err) + } + if err := os.MkdirAll(target, 0644); err != nil { + return fmt.Errorf("creating target path: %w", err) + } + if err := copyFile(orchardPath, filepath.Join(target, name)); err != nil { + return fmt.Errorf("moving file: %w", err) + } + return nil + }(); err != nil { + t.Logf("preparing to move Orchard file to known location: %v", err) + } + t.Logf("db: %v", filepath.Join(target, name)) + }) + return s, g +} diff --git a/testkeys/testkeys.go b/testkeys/testkeys.go index f2be17c..e649014 100644 --- a/testkeys/testkeys.go +++ b/testkeys/testkeys.go @@ -6,6 +6,7 @@ package testkeys import ( "bytes" + "fmt" "testing" "git.sr.ht/~whereswaldon/forest-go" @@ -43,6 +44,20 @@ func Signer(t *testing.T, privKey string) forest.Signer { return signer } +// MockSigner creates a fake signer for use in testing. +// Panics on error. +func MockSigner(privKey string) forest.Signer { + privkey, err := getKey(privKey, TestKeyPassphrase) + if err != nil { + panic(fmt.Errorf("creating private key: %v", err)) + } + signer, err := forest.NewNativeSigner(privkey) + if err != nil { + panic(fmt.Errorf("creating signer: %v", err)) + } + return signer +} + const TestKeyPassphrase = "pleasedonotusethisasapassword" const PrivKey1 = `-----BEGIN PGP PRIVATE KEY BLOCK----- diff --git a/testutil/node_utils.go b/testutil/node_utils.go index c6f1eab..80f6d24 100644 --- a/testutil/node_utils.go +++ b/testutil/node_utils.go @@ -5,6 +5,7 @@ content. package testutil import ( + "fmt" "testing" "git.sr.ht/~whereswaldon/forest-go" @@ -64,3 +65,36 @@ func RandomNodeSlice(length int, t *testing.T) ([]*fields.QualifiedHash, []fores } return ids, nodes } + +// MockIdentity creates a fake identity with a random name for the purposes of +// testing. +func MockIdentity() (*forest.Identity, forest.Signer) { + signer := testkeys.MockSigner(testkeys.PrivKey1) + identity, err := forest.NewIdentity(signer, RandomString(12), []byte{}) + if err != nil { + panic(fmt.Errorf("mocking up identity: %v", err)) + } + return identity, signer +} + +// MockCommunity creates a fake community for the purposes of testing. +func MockCommunity(identity *forest.Builder, name string) *forest.Community { + c, err := identity.NewCommunity(name, []byte{}) + if err != nil { + panic(fmt.Errorf("mocking up community: %v", err)) + } + return c +} + +// MockReply creates a fake reply with random content for the given identity and +// community. +func MockReply( + identity *forest.Builder, + parent forest.Node, +) *forest.Reply { + r, err := identity.NewReply(parent, RandomString(128), []byte{}) + if err != nil { + panic(fmt.Errorf("mocking up reply: %v", err)) + } + return r +} -- 2.30.0.windows.1
Hey Jack, Thanks so much for working on this. It's been a huge effort, but it's looking really good. I do have some questions/comments/recommendations, but I think they shouldn't be terribly painful. - The docs for Orchard.RecentReplies (and similar methods) say that they return nodes older than the provided timestamp, but the timestamp parameter is named "earliest". This seems backwards to me. Should that not be called "before", or "latest"? - The RecentReplies implementation appears to always unconditionally iterate over all replies. I think it would be relatively straightforward to make it exit after it has collected the required number of replies (without finishing the iteration), and I'd hope it's possible for us to seek to the requested start time without iterating from the beginning. The seeking might be complex though, and I'd be happy to defer that work for later if that's the case. - Orchard.ChildrenBatched should document the children's iteration order in its godoc somewhere. - TestChildrenBatched has three commented-out test cases. Are they obsolete, broken, or something else? - signer_test.go got reformatted in this patch, but no logical changes were made. I don't mind the reformatting, but I'd prefer for cosmetic changes like that to be separated out to reduce the size and noise level of the patch. - Both the tests in store and in orchard have a setup function that does pretty similar stuff. I think you can likely unify those without too much pain. Do you think that's worthwhile? Also, I'm struggling with the tests. I had to apply this patch to make them run at all, and they're still failing for me (Linux): ``` diff --git a/store/store_test.go b/store/store_test.go index e6796b9..d8a69e9 100644 --- a/store/store_test.go +++ b/store/store_test.go @@ -713,7 +713,7 @@ func (ids IDList) Contains(id *fields.QualifiedHash) bool { // The Grove is used as the reference implementation, which the Orchard is tested against. // After a test run, the 'orchard.db' file is moved to a known location for inspection. func setup(t *testing.T, prefix string) (*orchard.Orchard, *grove.Grove) { - tmp, err := ioutil.TempDir("forest_go_test", "") + tmp, err := ioutil.TempDir("", "forest_go_test") if err != nil { t.Skipf("error creating testing directory: %v", err) } @@ -721,10 +721,10 @@ func setup(t *testing.T, prefix string) (*orchard.Orchard, *grove.Grove) { orchardPath = filepath.Join(tmp, "orchard_test.db") grovePath = filepath.Join(tmp, "grove") ) - if err := os.MkdirAll(filepath.Dir(orchardPath), 0644)+ if err := os.MkdirAll(filepath.Dir(orchardPath), 0755) t.Skipf("preparing orchard file: %v", err) } - if err := os.MkdirAll(grovePath, 0644)+ if err := os.MkdirAll(grovePath, 0755) t.Skipf("preparing grove directory: %v", err) } s, err := orchard.Open(orchardPath) @@ -753,7 +753,7 @@ func setup(t *testing.T, prefix string) (*orchard.Orchard, *grove.Grove) { if err := os.RemoveAll(target) return fmt.Errorf("cleaning target path: %w", err) } - if err := os.MkdirAll(target, 0644)+ if err := os.MkdirAll(target, 0755) return fmt.Errorf("creating target path: %w", err) } if err := copyFile(orchardPath, filepath.Join(target, name)) ``` I can dig into why the tests are failing tomorrow. I didn't have time tonight. I suspect it's permissions somehow, but I'm not sure yet. Just wanted to get you feedback as soon as I could. Thanks again, I think this is almost ready to merge. Cheers, Chris