~emersion/public-inbox

Simon Ser: 1
 Add Decoder

 3 files changed, 498 insertions(+), 0 deletions(-)
Some other nice-to-haves which can be added in the future:

- Support for arrays (implemented in a branch).
- Allow specifying the "param" struct tag option multiple times, decode
  parameters in order.
- Add a scfg.DirectiveUnmarshaler interface for custom unmarshalling logic.
- Support encoding.TextUnmarshaler.
- Support scfg.Directive when used as a type to unmarshal to.
Export patchset (mbox)
How do I use this?

Copy & paste the following snippet into your terminal to import this patchset into git:

curl -s https://lists.sr.ht/~emersion/public-inbox/patches/42271/mbox | git am -3
Learn more about email & git

[PATCH go-scfg v2] Add Decoder Export this patch

From: delthas <delthas@dille.cc>

This adds unmarshaling support to go-scfg, inspired by the Go json
library.

Co-authored-by: Simon Ser <contact@emersion.fr>
---

I've rewritten this mostly from scratch, removing some of the
complexity, making the decoder a lot more restrictive, and adding
the "param" struct tag option.

 struct.go         |  74 ++++++++++++++
 unmarshal.go      | 247 ++++++++++++++++++++++++++++++++++++++++++++++
 unmarshal_test.go | 177 +++++++++++++++++++++++++++++++++
 3 files changed, 498 insertions(+)
 create mode 100644 struct.go
 create mode 100644 unmarshal.go
 create mode 100644 unmarshal_test.go

diff --git a/struct.go b/struct.go
new file mode 100644
index 000000000000..e33170936df6
--- /dev/null
+++ b/struct.go
@@ -0,0 +1,74 @@
package scfg

import (
	"fmt"
	"reflect"
	"strings"
	"sync"
)

// structInfo contains scfg metadata for structs.
type structInfo struct {
	param    int            // index of field storing parameters
	children map[string]int // indices of fields storing child directives
}

var (
	structCacheMutex sync.Mutex
	structCache      = make(map[reflect.Type]*structInfo)
)

func getStructInfo(t reflect.Type) (*structInfo, error) {
	structCacheMutex.Lock()
	defer structCacheMutex.Unlock()

	if info := structCache[t]; info != nil {
		return info, nil
	}

	info := &structInfo{
		param:    -1,
		children: make(map[string]int),
	}

	for i := 0; i < t.NumField(); i++ {
		f := t.Field(i)
		if !f.IsExported() {
			continue
		}

		tag := f.Tag.Get("scfg")
		parts := strings.Split(tag, ",")
		k, options := parts[0], parts[1:]
		if k == "-" {
			continue
		} else if k == "" {
			k = f.Name
		}

		isParam := false
		for _, opt := range options {
			switch opt {
			case "param":
				isParam = true
			default:
				return nil, fmt.Errorf("scfg: invalid option %q in struct tag", opt)
			}
		}

		if isParam {
			if info.param >= 0 {
				return nil, fmt.Errorf("scfg: param option specified multiple times in struct tag in %v", t)
			}
			if parts[0] != "" {
				return nil, fmt.Errorf("scfg: name must be empty when param option is specified in struct tag in %v", t)
			}
			info.param = i
		} else {
			info.children[k] = i
		}
	}

	structCache[t] = info
	return info, nil
}
diff --git a/unmarshal.go b/unmarshal.go
new file mode 100644
index 000000000000..d5ece2065c29
--- /dev/null
+++ b/unmarshal.go
@@ -0,0 +1,247 @@
package scfg

import (
	"fmt"
	"io"
	"reflect"
	"strconv"
)

// Decoder reads and decodes an scfg document from an input stream.
type Decoder struct {
	r io.Reader
}

// NewDecoder returns a new decoder which reads from r.
func NewDecoder(r io.Reader) *Decoder {
	return &Decoder{r}
}

// Decode reads scfg document from the input and stores it in the value pointed
// to by v.
//
// If v is nil or not a pointer, Decode returns an error.
//
// Blocks can be unmarshaled to:
//
//   - Maps. Each directive is unmarshaled into a map entry. Duplicate
//     directives are not allowed. The map key must be a string.
//   - Structs. Each directive is unmarshaled into a struct field. Duplicate
//     directives are not allowed.
//
// Directives can be unmarshaled to:
//
//   - Maps. The children block is unmarshaled into the map. Parameters are not
//     allowed.
//   - Structs. The children block is unmarshaled into the struct. Parameters
//     are allowed if one of the struct fields contains the "param" option in
//     its tag.
//   - Slices. Parameters are unmarshaled into the slice. Children blocks are
//     not allowed.
//   - Strings, booleans, integers or floating-point values. Only a single
//     parameter is allowed and is unmarshaled into the value. Children blocks
//     are not allowed.
//
// The decoding of each struct field can be customized by the format string
// stored under the "scfg" key in the struct field's tag. The tag contains the
// name of the field possibly followed by a comma-separated list of options.
// The name may be empty in order to specify options without overriding the
// default field name. As a special case, if the field name is "-", the field
// is ignored. The "param" option specifies that directive parameters are
// stored in this field (the name must be empty).
func (dec *Decoder) Decode(v interface{}) error {
	block, err := Read(dec.r)
	if err != nil {
		return err
	}

	rv := reflect.ValueOf(v)
	if rv.Kind() != reflect.Ptr || rv.IsNil() {
		return fmt.Errorf("scfg: invalid value for unmarshaling")
	}

	return unmarshalBlock(block, rv)
}

func unmarshalBlock(block Block, v reflect.Value) error {
	v = unwrapPointers(v)
	t := v.Type()

	switch v.Kind() {
	case reflect.Map:
		if t.Key().Kind() != reflect.String {
			return fmt.Errorf("scfg: map key type must be string")
		}
		if v.IsNil() {
			v.Set(reflect.MakeMap(t))
		} else if v.Len() > 0 {
			clearMap(v)
		}

		seen := make(map[string]struct{})
		for _, dir := range block {
			if _, dup := seen[dir.Name]; dup {
				return fmt.Errorf("scfg: directive %q specified multiple times", dir.Name)
			}

			mv := reflect.New(t.Elem()).Elem()
			if err := unmarshalDirective(dir, mv); err != nil {
				return err
			}

			v.SetMapIndex(reflect.ValueOf(dir.Name), mv)
			seen[dir.Name] = struct{}{}
		}
	case reflect.Struct:
		si, err := getStructInfo(t)
		if err != nil {
			return err
		}

		seen := make(map[string]struct{})
		for _, dir := range block {
			if _, dup := seen[dir.Name]; dup {
				return fmt.Errorf("scfg: directive %q specified multiple times", dir.Name)
			}

			fieldIndex, ok := si.children[dir.Name]
			if !ok {
				return fmt.Errorf("scfg: unknown directive %q", dir.Name)
			}
			fv := v.Field(fieldIndex)
			if err := unmarshalDirective(dir, fv); err != nil {
				return err
			}

			seen[dir.Name] = struct{}{}
		}
	default:
		return fmt.Errorf("scfg: unsupported type for unmarshaling blocks: %v", t)
	}

	return nil
}

func unmarshalDirective(dir *Directive, v reflect.Value) error {
	v = unwrapPointers(v)
	t := v.Type()

	switch v.Kind() {
	case reflect.Map:
		if len(dir.Params) > 0 {
			return fmt.Errorf("scfg: directive %q requires zero parameters", dir.Name)
		}
		if err := unmarshalBlock(dir.Children, v); err != nil {
			return err
		}
	case reflect.Struct:
		si, err := getStructInfo(t)
		if err != nil {
			return err
		}

		if si.param >= 0 {
			if err := unmarshalDirectiveParams(dir, v.Field(si.param)); err != nil {
				return err
			}
		} else {
			if len(dir.Params) > 0 {
				return fmt.Errorf("scfg: directive %q requires zero parameters", dir.Name)
			}
		}

		if err := unmarshalBlock(dir.Children, v); err != nil {
			return err
		}
	default:
		if len(dir.Children) != 0 {
			return fmt.Errorf("scfg: directive %q requires zero children", dir.Name)
		}
		if err := unmarshalDirectiveParams(dir, v); err != nil {
			return err
		}
	}
	return nil
}

func unmarshalDirectiveParams(dir *Directive, v reflect.Value) error {
	switch v.Kind() {
	case reflect.Slice:
		t := v.Type()
		sv := reflect.MakeSlice(t, len(dir.Params), len(dir.Params))
		for i, param := range dir.Params {
			ev := reflect.New(t.Elem()).Elem()
			if err := unmarshalParam(param, ev); err != nil {
				return err
			}
			sv.Index(i).Set(ev)
		}
		v.Set(sv)
	default:
		if len(dir.Params) != 1 {
			return fmt.Errorf("scfg: directive %q requires exactly one parameter", dir.Name)
		}
		if err := unmarshalParam(dir.Params[0], v); err != nil {
			return err
		}
	}

	return nil
}

func unmarshalParam(param string, v reflect.Value) error {
	v = unwrapPointers(v)
	t := v.Type()

	switch v.Kind() {
	case reflect.String:
		v.Set(reflect.ValueOf(param))
	case reflect.Bool:
		switch param {
		case "true":
			v.Set(reflect.ValueOf(true))
		case "false":
			v.Set(reflect.ValueOf(false))
		default:
			return fmt.Errorf("scfg: invalid bool parameter %q", param)
		}
	case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
		i, err := strconv.ParseInt(param, 10, t.Bits())
		if err != nil {
			return fmt.Errorf("scfg: invalid %v parameter: %v", t, err)
		}
		v.Set(reflect.ValueOf(i).Convert(t))
	case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64:
		u, err := strconv.ParseUint(param, 10, t.Bits())
		if err != nil {
			return fmt.Errorf("scfg: invalid %v parameter: %v", t, err)
		}
		v.Set(reflect.ValueOf(u).Convert(t))
	case reflect.Float32, reflect.Float64:
		f, err := strconv.ParseFloat(param, t.Bits())
		if err != nil {
			return fmt.Errorf("scfg: invalid %v parameter: %v", t, err)
		}
		v.Set(reflect.ValueOf(f).Convert(t))
	default:
		return fmt.Errorf("scfg: unsupported type for unmarshaling parameters: %v", t)
	}

	return nil
}

func unwrapPointers(v reflect.Value) reflect.Value {
	for v.Kind() == reflect.Ptr {
		if v.IsNil() {
			v.Set(reflect.New(v.Type().Elem()))
		}
		v = v.Elem()
	}
	return v
}

func clearMap(v reflect.Value) {
	for _, k := range v.MapKeys() {
		v.SetMapIndex(k, reflect.Value{})
	}
}
diff --git a/unmarshal_test.go b/unmarshal_test.go
new file mode 100644
index 000000000000..d64ec6e396fe
--- /dev/null
+++ b/unmarshal_test.go
@@ -0,0 +1,177 @@
package scfg_test

import (
	"fmt"
	"log"
	"reflect"
	"strings"
	"testing"

	"git.sr.ht/~emersion/go-scfg"
)

func ExampleDecoder() {
	var data struct {
		Foo int `scfg:"foo"`
		Bar struct {
			Param string `scfg:",param"`
			Baz   string `scfg:"baz"`
		} `scfg:"bar"`
	}

	raw := `foo 42
bar asdf {
	baz hello
}
`

	r := strings.NewReader(raw)
	if err := scfg.NewDecoder(r).Decode(&data); err != nil {
		log.Fatal(err)
	}

	fmt.Printf("Foo = %v\n", data.Foo)
	fmt.Printf("Bar.Param = %v\n", data.Bar.Param)
	fmt.Printf("Bar.Baz = %v\n", data.Bar.Baz)

	// Output:
	// Foo = 42
	// Bar.Param = asdf
	// Bar.Baz = hello
}

type nestedStruct struct {
	Foo nestedStructInner `scfg:"foo"`
}

type nestedStructInner struct {
	Bar string `scfg:"bar"`
}

type structParams struct {
	Params []string `scfg:",param"`
	Bar    string
}

var barStr = "bar"

var unmarshalTests = []struct {
	name string
	raw  string
	want interface{}
}{
	{
		name: "stringMap",
		raw: `hello world
foo bar`,
		want: map[string]string{
			"hello": "world",
			"foo":   "bar",
		},
	},
	{
		name: "simpleStruct",
		raw: `MyString asdf
MyBool true
MyInt -42
MyUint 42
MyFloat 3.14`,
		want: struct {
			MyString string
			MyBool   bool
			MyInt    int
			MyUint   uint
			MyFloat  float32
		}{
			MyString: "asdf",
			MyBool:   true,
			MyInt:    -42,
			MyUint:   42,
			MyFloat:  3.14,
		},
	},
	{
		name: "simpleStructTag",
		raw:  `foo bar`,
		want: struct {
			Foo string `scfg:"foo"`
		}{
			Foo: "bar",
		},
	},
	{
		name: "sliceParams",
		raw:  `Foo a s d f`,
		want: struct {
			Foo []string
		}{
			Foo: []string{"a", "s", "d", "f"},
		},
	},
	{
		name: "pointers",
		raw:  `Foo bar`,
		want: struct {
			Foo *string
		}{
			Foo: &barStr,
		},
	},
	{
		name: "nestedMap",
		raw: `foo {
	bar baz
}`,
		want: struct {
			Foo map[string]string `scfg:"foo"`
		}{
			Foo: map[string]string{"bar": "baz"},
		},
	},
	{
		name: "nestedStruct",
		raw: `foo {
	bar baz
}`,
		want: nestedStruct{
			Foo: nestedStructInner{
				Bar: "baz",
			},
		},
	},
	{
		name: "structParams",
		raw: `Foo param1 param2 {
	Bar baz
}`,
		want: struct {
			Foo structParams
		}{
			Foo: structParams{
				Params: []string{"param1", "param2"},
				Bar:    "baz",
			},
		},
	},
}

func TestUnmarshal(t *testing.T) {
	for _, tc := range unmarshalTests {
		tc := tc // capture variable
		t.Run(tc.name, func(t *testing.T) {
			testUnmarshal(t, tc.raw, tc.want)
		})
	}
}

func testUnmarshal(t *testing.T, raw string, want interface{}) {
	out := reflect.New(reflect.TypeOf(want))
	r := strings.NewReader(raw)
	if err := scfg.NewDecoder(r).Decode(out.Interface()); err != nil {
		t.Fatalf("Decode() = %v", err)
	}
	got := out.Elem().Interface()
	if !reflect.DeepEqual(got, want) {
		t.Errorf("Decode() = \n%#v\n but want \n%#v", got, want)
	}
}
-- 
2.41.0
There is a use-case I'd like to implement on top of this work, but
it proves difficult. In soju the "listen" directive can be repeated
like so:

    listen addr1
    listen addr2

I'd like to collect these multiple "listen" values into a single
struct field:

    struct {
        Listen []string `scfg:"listen"`
    }

I've implemented a patch to automatically do this, however this
breaks use-cases where directives cannot be repeated:

    db sqlite3 ~/soju.db
    db postgresql localhost

When trying to unmarshal to the following struct:

    struct {
        DB []string `scfg:"db"`
    }

The result would be a slice with 4 entries, but what we really want
is an error (or at most 2 entries).

Possible solutions:

- Add a "accumulate"/"multiple" struct tag option to opt-in.
- Decide that slice struct fields must not be used for directives
  which cannot be repeated. Add an Unmarshaler interface that users
  can implement to forbid repetitions.