~emersion/public-inbox

go-scfg: Add Unmarshal, Decode, NewDecoder v1 PROPOSED

delthas: 1
 Add Unmarshal, Decode, NewDecoder

 3 files changed, 512 insertions(+), 0 deletions(-)
Export patchset (mbox)
How do I use this?

Copy & paste the following snippet into your terminal to import this patchset into git:

curl -s https://lists.sr.ht/~emersion/public-inbox/patches/19979/mbox | git am -3
Learn more about email & git
View this thread in the archives

[PATCH go-scfg] Add Unmarshal, Decode, NewDecoder Export this patch

This adds unmarshaling support to go-scfg, inspired by the Go json
library.
---
 struct.go         |  98 ++++++++++++++++++++
 unmarshal.go      | 231 ++++++++++++++++++++++++++++++++++++++++++++++
 unmarshal_test.go | 183 ++++++++++++++++++++++++++++++++++++
 3 files changed, 512 insertions(+)
 create mode 100644 struct.go
 create mode 100644 unmarshal.go
 create mode 100644 unmarshal_test.go

diff --git a/struct.go b/struct.go
new file mode 100644
index 0000000..d5ba7e8
--- /dev/null
+++ b/struct.go
@@ -0,0 +1,98 @@
package scfg

import (
	"fmt"
	"reflect"
	"strings"
	"sync"
)

// inspired by
// https://github.com/go-mgo/mgo/blob/v2/bson/bson.go

type structInfo struct {
	FieldsMap  map[string]int
	InlineMap  int
	Zero       reflect.Value
}

var structMap = make(map[reflect.Type]*structInfo)
var structMapMutex sync.RWMutex

func getStructInfo(st reflect.Type) (*structInfo, error) {
	structMapMutex.RLock()
	sinfo, found := structMap[st]
	structMapMutex.RUnlock()
	if found {
		return sinfo, nil
	}
	n := st.NumField()
	fieldsMap := make(map[string]int)
	inlineMap := -1
	for i := 0; i != n; i++ {
		field := st.Field(i)
		if field.PkgPath != "" && !field.Anonymous {
			continue // Private field
		}

		tag := field.Tag.Get("scfg")
		if tag == "" && strings.Index(string(field.Tag), ":") < 0 {
			tag = string(field.Tag)
		}
		if tag == "-" {
			continue
		}

		inline := false
		fields := strings.Split(tag, ",")
		if len(fields) > 1 {
			for _, flag := range fields[1:] {
				switch flag {
				case "inline":
					inline = true
				default:
					return nil, fmt.Errorf("unsupported flag %q in tag %q of type %s", flag, tag, st)
				}
			}
			tag = fields[0]
		}

		if inline {
			switch field.Type.Kind() {
			case reflect.Map:
				if inlineMap >= 0 {
					return nil, fmt.Errorf("multiple ,inline maps in struct %v", st.String())
				}
				if field.Type.Key().Kind() != reflect.String {
					return nil, fmt.Errorf("option ,inline needs a map with string keys in struct %v", st.String())
				}
				inlineMap = i
			default:
				return nil, fmt.Errorf("option ,inline in struct %v needs a map field", st.String())
			}
			continue
		}

		var key string
		if tag != "" {
			key = tag
		} else {
			key = strings.ToLower(field.Name)
		}

		if _, found = fieldsMap[key]; found {
			return nil, fmt.Errorf("duplicated key %q in struct %v", key, st.String())
		}

		fieldsMap[key] = i
	}
	sinfo = &structInfo{
		fieldsMap,
		inlineMap,
		reflect.New(st).Elem(),
	}
	structMapMutex.Lock()
	structMap[st] = sinfo
	structMapMutex.Unlock()
	return sinfo, nil
}
diff --git a/unmarshal.go b/unmarshal.go
new file mode 100644
index 0000000..674511e
--- /dev/null
+++ b/unmarshal.go
@@ -0,0 +1,231 @@
package scfg

import (
	"bytes"
	"fmt"
	"io"
	"reflect"
	"strconv"
	"strings"
)

// A Decoder reads and decodes scfg values from an input stream.
type Decoder struct {
	r io.Reader
}

// NewDecoder returns a new decoder that reads from r.
//
// The decoding process currently buffers all data,
// this is merely a streaming interface over Unmarshal.
func NewDecoder(r io.Reader) *Decoder {
	return &Decoder{r: r}
}

// Decode reads an scfg-encoded block from its
// input and stores it in the value pointed to by v.
//
// See the documentation for Unmarshal for details about
// the conversion of scfg data into a Go value.
func (d *Decoder) Decode(v interface{}) error {
	r, err := Read(d.r)
	if err != nil {
		return fmt.Errorf("unmarshal: %v", err)
	}

	rv := reflect.ValueOf(v)
	if rv.Kind() != reflect.Ptr || rv.IsNil() {
		return fmt.Errorf("unmarshal: invalid parameter")
	}
	return readBlockInto(r, rv)
}

// Unmarshal parses an scfg-encoded block and stores the result
// in the value pointed to by v. If v is nil or not a pointer,
// Unmarshal returns an error.
//
// Blocks can be unmarshaled to:
// - maps: the map key type must be string, and each directive from
//   the block will be mapped to a map entry
// - structs: each directive is mapped to:
//   - a corresponding struct field
//   - an inline map
//
// Unmarshal ignores any struct fields whose tag is a literal hyphen,
// as well as unexported struct fields.
//
// Directive values can be unmarshaled to:
// - string: the first parameter will be copied to the string
// - any int, float type: the first parameter will be converted to
//   the numeric type, then copied to it
// - bool: the first paremeter will be lowered, converted and copied:
//   - "0", "no", "false" means false
//   - "1", "yes", "true" means true
// - a slice: the parameters will be copied to the slice following the
//   rules above.
//
func Unmarshal(data []byte, v interface{}) error {
	return NewDecoder(bytes.NewReader(data)).Decode(v)
}

func clearMap(m reflect.Value) {
	var none reflect.Value
	for _, k := range m.MapKeys() {
		m.SetMapIndex(k, none)
	}
}

func readBlockInto(r Block, v reflect.Value) error {
	t := v.Type()
	k := t.Kind()

	for k == reflect.Ptr {
		if v.IsNil() {
			v.Set(reflect.New(t.Elem()))
		}
		v = v.Elem()
		t = v.Type()
		k = t.Kind()
	}

	switch k {
	case reflect.Map:
		if t.Key().Kind() != reflect.String {
			return fmt.Errorf("decode: map key type must be string")
		}
		if v.IsNil() {
			v.Set(reflect.MakeMap(t))
		} else if v.Len() > 0 {
			clearMap(v)
		}
		for _, d := range r {
			mk := reflect.ValueOf(d.Name)
			mvt := t.Elem()
			mv := reflect.New(mvt).Elem()
			mv.Set(reflect.Zero(mvt))

			if err := readDirectiveInto(d, mv); err != nil {
				return err
			}
			v.SetMapIndex(mk, mv)
		}
	case reflect.Struct:
		sinfo, err := getStructInfo(t)
		if err != nil {
			return err
		}
		fieldsMap := sinfo.FieldsMap
		v.Set(sinfo.Zero)
		var inlineMap reflect.Value
		var elemType reflect.Type
		if sinfo.InlineMap != -1 {
			inlineMap = v.Field(sinfo.InlineMap)
			if !inlineMap.IsNil() && inlineMap.Len() > 0 {
				clearMap(inlineMap)
			}
			elemType = inlineMap.Type().Elem()
		}
		for _, d := range r {
			if num, ok := fieldsMap[d.Name]; ok {
				if err := readDirectiveInto(d, v.Field(num)); err != nil {
					return err
				}
			} else if inlineMap.IsValid() {
				if inlineMap.IsNil() {
					inlineMap.Set(reflect.MakeMap(inlineMap.Type()))
				}

				mk := reflect.ValueOf(d.Name)
				mv := reflect.New(elemType).Elem()
				mv.Set(reflect.Zero(elemType))

				if err := readDirectiveInto(d, mv); err != nil {
					return err
				}
				inlineMap.SetMapIndex(mk, mv)
			}
		}
	default:
		return fmt.Errorf("decode: unsupported type for unmarshaling: %v", t.String())
	}
	return nil
}

func readDirectiveInto(d *Directive, v reflect.Value) error {
	t := v.Type()
	k := t.Kind()

	for k == reflect.Ptr {
		if v.IsNil() {
			v.Set(reflect.New(t.Elem()))
		}
		v = v.Elem()
		t = v.Type()
		k = t.Kind()
	}

	switch k {
	case reflect.Map, reflect.Struct:
		if d.Children != nil {
			if err := readBlockInto(d.Children, v); err != nil {
				return err
			}
		}
	case reflect.Slice:
		s := reflect.MakeSlice(t, len(d.Params), len(d.Params))
		for i, p := range d.Params {
			e := reflect.New(t.Elem()).Elem()
			if err := readValueInto(p, e); err != nil {
				return err
			}
			s.Index(i).Set(e)
		}
		v.Set(s)
	default:
		if len(d.Params) > 0 {
			if err := readValueInto(d.Params[0], v); err != nil {
				return err
			}
		}
	}
	return nil
}

func readValueInto(s string, v reflect.Value) error {
	t := v.Type()
	k := t.Kind()
	switch k {
	case reflect.String:
		v.Set(reflect.ValueOf(s))
	case reflect.Bool:
		switch strings.ToLower(s) {
		case "0", "false", "no":
			v.Set(reflect.ValueOf(false))
		case "1", "true", "yes":
			v.Set(reflect.ValueOf(true))
		default:
			return fmt.Errorf("decode: invalid value, expected bool: %v", s)
		}
	case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
		n, err := strconv.ParseInt(s, 10, 64)
		if err != nil {
			return fmt.Errorf("decode: invalid value, expected int: %v", s)
		}
		v.Set(reflect.ValueOf(n).Convert(t))
	case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr:
		n, err := strconv.ParseUint(s, 10, 64)
		if err != nil {
			return fmt.Errorf("decode: invalid value, expected uint: %v", s)
		}
		v.Set(reflect.ValueOf(n).Convert(t))
	case reflect.Float32, reflect.Float64:
		n, err := strconv.ParseFloat(s, 64)
		if err != nil {
			return fmt.Errorf("decode: invalid value, expected float: %v", s)
		}
		v.Set(reflect.ValueOf(n).Convert(t))
	default:
		return fmt.Errorf("decode: unsupported type for unmarshaling value: %v", t.String())
	}
	return nil
}
diff --git a/unmarshal_test.go b/unmarshal_test.go
new file mode 100644
index 0000000..2d36e26
--- /dev/null
+++ b/unmarshal_test.go
@@ -0,0 +1,183 @@
package scfg

import (
	"bytes"
	"github.com/davecgh/go-spew/spew"
	"reflect"
	"testing"
)

func test(t *testing.T, zero interface{}, expected interface{}, s string) {
	err := Unmarshal([]byte(s), zero)
	if err != nil {
		t.Fatalf("unmarshal error: %v", err)
	}
	if !reflect.DeepEqual(zero, expected) {
		t.Error(spew.Sprintf("Unmarshal() = \n %v \n but want \n %v", zero, expected))
	}
}

func TestSimple(t *testing.T) {
	type s struct {
		Foo string `scfg:"foo"`
		Bar string `scfg:"bar"`
	}
	test(t, &s{}, &s{
		Foo: "f123",
		Bar: "b123",
	}, `
foo "f123"
bar "b123"
`)
}

func TestInline(t *testing.T) {
	type s struct {
		Apple string `scfg:"apple"`
		Napple string `scfg:"napple"`
		Inline map[string]string `scfg:",inline"`
	}
	test(t, &s{}, &s{
		Apple: "a123",
		Napple: "123",
		Inline: map[string]string{
			"jack": "1",
			"lumber": "1",
			"slumber": "hi",
		},
	}, `
apple a123
napple "123"
jack 1
lumber 1 2 3
slumber hi {
	hi jack
	jack
}
`)
}

func TestNested(t *testing.T) {
	type n struct {
			Bird1 string `scfg:"bird1"`
			Bird2 int `scfg:"bird2"`
	}
	type s struct {
		Nest n `scfg:"nest"`
		Worm string `scfg:"worm"`
	}
	test(t, &s{}, &s{
		Nest: n{
			Bird1: "oizo",
			Bird2: 3,
		},
		Worm: "true",
	}, `
nest unused {
	bird1 oizo oiza oizi
	bird2 3
	bird3 bird1
}
worm true
`)
}

func TestComplex(t *testing.T) {
	type dog struct {
		BarkCount int `scfg:"bark_count"`
		BarkFast bool `scfg:"bark_fast"`
		BarkSpeed float64 `scfg:"bark_speed"`
	}
	type cat struct {
		Trampoline uintptr `scfg:"trampoline"`
	}
	type animal struct {
		Dog dog  `scfg:"dog"`
		Cat cat  `scfg:"cat"`
	}
	type s struct {
		Animal animal `scfg:"animal"`
		Battleships []string `scfg:"battleships"`
	}
	test(t, &s{}, &s{
		Animal: animal{
			Dog: dog{
				BarkCount: 41,
				BarkFast:  true,
				BarkSpeed: 4.123,
			},
			Cat: cat{
				Trampoline: 4444,
			},
		},
		Battleships: []string{"yuudachi", "yuudachi", "yuudachi", "yuudachi kai"},
	}, `
animal hi mom {
	dog hi dad {
		bark_count "41"
		bark_fast "yes" , very - bark
		bark_speed 4.123 - 4.123 = 0
	}
	cat {
		a cat jumps on a
		trampoline 4444 times
	}
}
battleships yuudachi yuudachi "yuudachi" "yuudachi kai"
`)
}

func TestNoTag(t *testing.T) {
	type s struct {
		Barber int
		Shop int
	}
	test(t, &s{}, &s{
		Barber: 1,
		Shop: -1,
	}, `
barber 1 2 3
shop -1
shap 2
`)
}

func TestSkip(t *testing.T) {
	type s struct {
		Skip string `scfg:"-"`
		Swap string `scfg:"that"`
		That string `scfg:"swap"`
	}
	test(t, &s{}, &s{
		Skip: "",
		Swap: "that",
		That: "swap",
	}, `
skip this
swap swap
that that
`)
}

func TestStream(t *testing.T) {
	type s struct {
		Foo string `scfg:"foo"`
		Bar string `scfg:"bar"`
	}
	d := NewDecoder(bytes.NewReader([]byte(`
foo "f123"
bar "b123"
`)))
	var got s
	err := d.Decode(&got)
	if err != nil {
		t.Fatalf("decode error: %v", err)
	}
	want := s{
		Foo: "f123",
		Bar: "b123",
	}
	if !reflect.DeepEqual(got, want) {
		t.Error(spew.Sprintf("Decode() = \n %v \n but want \n %v", got, want))
	}
}
-- 
2.17.1
Overall that code looks good to me, nice work!

Nit: maybe we should only recognize "true" and "false" for bools. Being
too permissive about these sort of things has hit me in the past.

Idea for a future improvement: add an Unmarshaller interface to allow
custom unpacking, just like encoding/json and encoding/xml have.

I have one question: right now the unmarshaller is pretty permissive.
As seen in the tests, extra parameters are ignored. Since scfg is
designed to be used for configuration files, I wonder if it would make
more sense to be more strict by default? In other words, error out when
trying to unmarshal a field missing from the struct. (We could always
have a Decoder function to turn that off.)

One other question: it doesn't seem like there's a way to grab the
params if the directive has children? For instance here:

    parent param1 param2 {
        child
    }

`param1` and `param2` will be lost no matter what the struct looks like?