~jonsterling: 1 Create intermediate datatype codifying XML format 11 files changed, 720 insertions(+), 451 deletions(-)
Copy & paste the following snippet into your terminal to import this patchset into git:
curl -s https://lists.sr.ht/~jonsterling/forester-devel/patches/53363/mbox | git am -3Learn more about email & git
From: Jon Sterling <jon@jonmsterling.com> Closes: https://todo.sr.ht/~jonsterling/forester/64 --- lib/core/Forester_core.ml | 2 + lib/core/Xml_tree.ml | 117 ++++++++ lib/frontend/Forest.ml | 11 +- lib/render/Compile.ml | 396 +++++++++++++++++++++++++++ lib/render/Compile.mli | 7 + lib/render/Render_text.ml | 1 - lib/render/Render_xml.ml | 441 ------------------------------ lib/render/Serialise_xml_tree.ml | 169 ++++++++++++ lib/render/Serialise_xml_tree.mli | 7 + lib/render/Xml_forester.ml | 17 +- lib/render/Xml_forester.mli | 3 + 11 files changed, 720 insertions(+), 451 deletions(-) create mode 100644 lib/core/Xml_tree.ml create mode 100644 lib/render/Compile.ml create mode 100644 lib/render/Compile.mli delete mode 100644 lib/render/Render_xml.ml create mode 100644 lib/render/Serialise_xml_tree.ml create mode 100644 lib/render/Serialise_xml_tree.mli diff --git a/lib/core/Forester_core.ml b/lib/core/Forester_core.ml index f424d05..e68d0cc 100644 --- a/lib/core/Forester_core.ml +++ b/lib/core/Forester_core.ml @@ -16,3 +16,5 @@ module Prim = Prim module TeX_cs = TeX_cs module Symbol = Symbol + +module Xml_tree = Xml_tree \ No newline at end of file diff --git a/lib/core/Xml_tree.ml b/lib/core/Xml_tree.ml new file mode 100644 index 0000000..9afc002 --- /dev/null +++ b/lib/core/Xml_tree.ml @@ -0,0 +1,117 @@ +open Forester_prelude + +type xml_qname = { + prefix : string; + (** The prefix to a qualified XML name; this prefix is expected to be rendered in the scope of a corresponding [xmlns] binding. *) + + uname : string; + (** The unqualified part of the XML name. *) + + xmlns : string option + (** The XML namespace bound by the current scope to [prefix]. This is not used when serialising to XML, but can be helpful for other analyses. *) +} + +type xml_attr = {key : xml_qname; value : string} + +type 'content attribution = + | Author of 'content + | Contributor of 'content + +type date = + | Date of { + href : string option; + year : int; + month : int option; + day : int option + } + +type 'content meta = + | Meta of { + key : string; + body : 'content + } + +type link_type = [`Local | `External] + +type ('content, 'tree) content_node = + | Text of string + | CDATA of string + | Xml_tag of { + name : xml_qname; + attrs : xml_attr list; + content : 'content + } + | Prim of Prim.t * 'content + | Subtree of 'tree + | Ref of { + addr : string; + href : string; + taxon : string option; + number : string option + } + | Link of { + type_ : link_type; + href : string; + title : string option; + addr : string option; + content : 'content + } + | TeX of { + display : [`Inline | `Block]; + body : string + } + | Img of {src : string} + | Embedded_tex of { + preamble : string; + source : string + } + | Info of string + + +type 'content frontmatter = { + title : 'content option; + anchor : string; + number : string option; + taxon : string option; + designated_parent : string option; + metas : 'content meta list; + route : string; + addr : string; + source_path : string option; + dates : date list; + last_changed : date option; + attributions : 'content attribution list +} + +(* TODO: generalise *) +type 'tree backmatter_elt = + | Contributions of 'tree list + | Related of 'tree list + | Backlinks of 'tree list + | Context of 'tree list + | References of 'tree list + +type 'tree backmatter = 'tree backmatter_elt list + +type tree_options = { + toc : bool; + numbered : bool; + show_heading : bool; + show_metadata : bool; + expanded : bool; + root : bool +} + +type 'content tree = { + options : tree_options; + frontmatter : 'content frontmatter; + mainmatter : 'content; + backmatter : 'content tree backmatter option +} + +(* Tie the knot *) +type tree_ = Tree of content tree +and content = Content of (content, tree_) content_node list + +let splice (Content xs) = xs +let splice_tree (Tree tree) = tree \ No newline at end of file diff --git a/lib/frontend/Forest.ml b/lib/frontend/Forest.ml index da815ef..4b5fdb5 100644 --- a/lib/frontend/Forest.ml +++ b/lib/frontend/Forest.ml @@ -264,12 +264,8 @@ let render_tree ~cfg ~cwd (tree : Sem.tree) = Eio.Path.with_open_out ~create path @@ fun flow -> Eio.Buf_write.with_flow flow @@ fun writer -> let fmt = Eio_util.formatter_of_writer writer in - let node = Render_xml.render_tree_top tree in - Format.fprintf fmt {|<?xml version="1.0" encoding="UTF-8"?>|}; - Format.pp_print_newline fmt (); - Format.fprintf fmt "<?xml-stylesheet type=\"text/xsl\" href=\"%s\"?>" cfg.stylesheet; - Format.pp_print_newline fmt (); - Pure_html.pp fmt node + Serialise_xml_tree.pp ~stylesheet:cfg.stylesheet fmt @@ + Compile.compile_tree_top tree end let render_json ~cwd docs = @@ -320,7 +316,8 @@ let render_trees ~cfg ~forest ~render_only : unit = Eio_util.ensure_dir_path cwd ["output"; "resources"]; run_renderer ~cfg forest @@ fun () -> - Render_xml.with_mainmatter_cache @@ fun () -> + Compile.run @@ fun () -> + Serialise_xml_tree.run @@ fun () -> let trees = match render_only with | None -> forest.trees |> M.to_seq |> Seq.map snd |> List.of_seq diff --git a/lib/render/Compile.ml b/lib/render/Compile.ml new file mode 100644 index 0000000..6e89df0 --- /dev/null +++ b/lib/render/Compile.ml @@ -0,0 +1,396 @@ +open Forester_prelude +open Forester_core + +module X = Xml_tree +module F = Xml_forester +module E = Render_effect.Perform + +module String_map = Map.Make (String) +module Addr_map = Map.Make (Addr) + +module Ancestors = Algaeff.Reader.Make (struct type t = addr list end) +module Current_addr = Algaeff.Reader.Make (struct type t = addr end) +module Mainmatter_cache = Algaeff.State.Make (struct type t = X.content Addr_map.t end) + +module Xmlns_map = +struct + type t = + {prefix_to_xmlns : string String_map.t; + xmlns_to_prefixes : string list String_map.t} + + let empty = + {prefix_to_xmlns = String_map.empty; + xmlns_to_prefixes = String_map.empty} + + let assoc ~prefix ~xmlns env = + {prefix_to_xmlns = String_map.add prefix xmlns env.prefix_to_xmlns; + xmlns_to_prefixes = String_map.add_to_list xmlns prefix env.xmlns_to_prefixes} +end + +module Xmlns_prefixes = Algaeff.Reader.Make (Xmlns_map) + +let get_xmlns_for_prefix prefix = + let env = Xmlns_prefixes.read () in + String_map.find_opt prefix env.prefix_to_xmlns + +let rec normalise_prefix ?loc ~prefix ~xmlns kont = + match xmlns with + | Some xmlns -> + begin + let open Xmlns_map in + let env = Xmlns_prefixes.read () in + let exception Shadowing in + try + begin + match + String_map.find_opt prefix env.prefix_to_xmlns, + String_map.find_opt xmlns env.xmlns_to_prefixes + with + | None, (None | Some []) -> + let env = assoc ~prefix ~xmlns env in + Xmlns_prefixes.run ~env @@ fun () -> + kont @@ ([(prefix, xmlns)], prefix) + | Some xmlns', Some prefixes -> + if xmlns' = xmlns && List.mem prefix prefixes then + kont ([], prefix) + else + raise Shadowing + | _, Some (prefix' :: _) -> + kont ([], prefix') + | Some xmlns', None -> + raise Shadowing + end + with Shadowing -> + normalise_prefix ?loc ~prefix:(prefix ^ "_") ~xmlns:(Some xmlns) kont + end + | _ -> + kont ([], prefix) + + +let compile_date (date : Date.t) = + let date_addr = User_addr (Format.asprintf "%a" Date.pp date) in + let href = E.get_doc date_addr |> Option.map @@ fun _doc -> E.route date_addr in + let year = Date.year date in + let month = Date.month date in + let day = Date.day date in + X.Date {href; year; month; day} + +let compile_dates = List.map compile_date + +let rec compile_located (located : Sem.node Range.located) = + match located.value with + | Sem.Text txt -> + [X.Text txt] + | Sem.Verbatim txt -> + [X.CDATA txt] + | Sem.Prim (p, xs) -> + compile_prim p xs + | Sem.Math (mode, xs) -> + let body = + let module TP = Render_TeX_like.Printer in + Str.global_replace (Str.regexp "\n") " " @@ + TP.contents @@ Render_TeX_like.render ~cfg:{tex = false} xs + in + let display = + match mode with + | Inline -> `Inline + | Display -> `Block + in + [X.TeX {display; body}] + + | Sem.Link (addr, title, modifier) -> + begin + match E.get_doc addr with + | Some tree -> + compile_internal_link ~title ~modifier ~addr ~dest:tree + | None -> + let url = Format.asprintf "%a" pp_addr addr in + compile_external_link ~title ~modifier ~url + end + + | Sem.Ref addr -> + begin + match E.get_doc addr with + | None -> + Reporter.fatalf ?loc:located.loc Tree_not_found "could not find tree at address `%a` for reference" pp_addr addr + | Some tree -> + let href = E.route addr in + let addr = Format.asprintf "%a" pp_addr addr in + let taxon = tree.fm.taxon |> Option.map String_util.sentence_case in + let number = tree.fm.number in + [X.Ref {addr; taxon; href; number}] + end + + | Sem.Img path -> + [X.Img {src = path}] + + | Sem.If_tex (_, xs) -> + X.splice @@ compile_nodes xs + + | Sem.Xml_tag (name, attrs, xs) -> + let rec fold_attrs tag_prefix updates acc attrs = + match attrs with + | [] -> + let xmlns_attrs = + updates |> List.map @@ fun (prefix, xmlns) -> + X.{key = + X.{prefix = "xmlns"; + uname = prefix; + xmlns = None}; + value = xmlns} + in + let name = X.{prefix = tag_prefix; uname = name.uname; xmlns = get_xmlns_for_prefix tag_prefix} in + let attrs = xmlns_attrs @ List.rev acc in + let content = compile_nodes xs in + X.Xml_tag {name; attrs; content} + + | (k, v) :: attrs -> + normalise_prefix ?loc:located.loc ~prefix:k.prefix ~xmlns:k.xmlns @@ fun (updates', prefix) -> + let xml_attr = + X.{key = X.{prefix; uname = k.uname; xmlns = None}; + value = Render_text.Printer.contents @@ Render_text.render v} + in + fold_attrs tag_prefix (updates @ updates') (xml_attr :: acc) attrs + in + + [normalise_prefix ~prefix:name.prefix ~xmlns:name.xmlns @@ fun (updates, tag_prefix) -> + fold_attrs tag_prefix updates [] attrs] + + | Sem.TeX_cs name -> + Reporter.fatalf ?loc:located.loc Resolution_error + "unresolved TeX control sequence `\\%a`" TeX_cs.pp name + + | Sem.Object _ -> + Reporter.fatal ?loc:located.loc Type_error + "tried to compile object closure to XML" + + | Sem.Embed_tex {preamble; source} -> + let as_tex x = + Render_TeX_like.Printer.contents @@ + Render_TeX_like.render ~cfg:{tex = true} x + in + let preamble = as_tex preamble in + let source = as_tex source in + let hash = Digest.to_hex @@ Digest.string @@ preamble ^ source in + (* TODO: the following should be done during evaluation! *) + E.enqueue_latex ~name:hash ~preamble ~source; + [X.Embedded_tex {preamble; source}] + + | Sem.Transclude (opts, addr) -> + begin + match E.get_doc addr with + | None -> + Reporter.fatalf ?loc:located.loc Tree_not_found "could not find tree at address `%a` for transclusion" pp_addr addr + | Some doc -> + compile_transclusion ~opts doc + end + + | Sem.Subtree (opts, subtree) -> + compile_transclusion ~opts subtree + + | Sem.Query (opts, query) -> + let trees = E.run_query query in + begin + match trees with + | [] -> + [X.Prim (`P, X.Content [X.Info "Transclusion cycle"])] + | _ -> + trees |> List.concat_map @@ fun tree -> + let opts = Sem.{expanded = false; show_heading = true; title_override = None; taxon_override = None; toc = false; numbered = false; show_metadata = true} in + compile_transclusion ~opts tree + end + +and compile_transclusion ~opts (tree : Sem.tree) = + let current = Current_addr.read () in + let update old_ancestors = current :: old_ancestors in + Ancestors.scope update @@ fun () -> + [X.Subtree (compile_tree ~opts tree)] + +and compile_title ~(opts : Sem.transclusion_opts) (fm : Sem.frontmatter) = + let ancestors = Ancestors.read () in + let title = + match opts.title_override with + | Some title -> Some title + | None -> + fm.title |> Option.map @@ + Render_util.expand_title_with_parents ~ancestors fm + in + title |> Option.map @@ fun title -> + compile_nodes @@ Sem.sentence_case title + +and compile_attributions ~contributors ~authors = + match authors, contributors with + | [], [] -> [] + | _ -> + List.map compile_author authors @ List.map compile_contributor contributors + +and compile_author author = + X.Author (compile_attribution_inner author) + +and compile_contributor author = + X.Contributor (compile_attribution_inner author) + +and compile_attribution_inner author = + let exception Untitled in + try + match E.get_doc author with + | None -> raise Untitled + | Some biotree -> + let href = E.route biotree.fm.addr in + let content = + match biotree.fm.title with + | None -> raise Untitled + | Some title -> compile_nodes title + in + let title = biotree.fm.title |> Option.map Sem.string_of_nodes in + let addr = Option.some @@ Format.asprintf "%a" pp_addr author in + X.Content [X.Link {type_ = `Local; href; title; addr; content}] + with Untitled -> + let name = Format.asprintf "%a" pp_addr author in + X.Content [X.Text name] + + +and compile_meta (key, body) = + let body = compile_nodes body in + X.Meta {key; body} + +and compile_frontmatter ~opts (fm : Sem.frontmatter) = + let anchor = string_of_int @@ Oo.id (object end) in + let title = compile_title ~opts fm in + let number = fm.number in + let taxon = + Option.map String_util.sentence_case @@ + match opts.taxon_override with + | Some taxon -> Some taxon + | None -> fm.taxon + in + let route = E.route fm.addr in + let source_path = fm.source_path in + let addr = Format.asprintf "%a" pp_addr fm.addr in + let designated_parent = + fm.designated_parent |> Option.map @@ fun addr -> + Format.asprintf "%a" pp_addr addr + in + let dates = compile_dates fm.dates in + let attributions = compile_attributions ~contributors:(E.contributors fm.addr) ~authors:fm.authors in + let last_changed = E.last_changed fm.addr |> Option.map compile_date in + let metas = fm.metas |> List.map compile_meta in + X.{title; + anchor; + number; + taxon; + designated_parent; + metas; + route; + addr; + source_path; + dates; + last_changed; + attributions} + +and compile_tree ?(backmatter = false) ~opts (tree : Sem.tree) = + Current_addr.run ~env:tree.fm.addr @@ fun () -> + let ancestors = Ancestors.read () in + let options = + X.{toc = opts.toc; + numbered = opts.numbered; + show_heading = opts.show_heading; + show_metadata = opts.show_metadata; + expanded = opts.expanded; + root = E.is_root tree.fm.addr} + in + let frontmatter = compile_frontmatter ~opts tree.fm in + let mainmatter = + begin + match tree.fm.addr with + | addr when List.mem addr ancestors -> + X.Content [X.Prim (`P, X.Content [X.Info "Transclusion cycle"])] + | addr -> + let cache = Mainmatter_cache.get () in + match Addr_map.find_opt addr cache with + | Some cached -> cached + | None -> + let result = compile_nodes tree.body in + Mainmatter_cache.modify @@ Addr_map.add addr result; + result + end; + + in + let backmatter = + if backmatter then + Some (compile_backmatter tree.fm.addr) + else + None + in + X.Tree {options; frontmatter; mainmatter; backmatter} + +and compile_backmatter addr = + let opts = {Sem.default_transclusion_opts with numbered = false} in + let compile_trees = + List.map @@ fun tree -> + X.splice_tree @@ compile_tree ~opts tree + in + let contributions = compile_trees @@ E.contributions addr in + let context = compile_trees @@ E.parents addr in + let related = compile_trees @@ E.related addr in + let backlinks = compile_trees @@ E.backlinks addr in + let references = compile_trees @@ E.bibliography addr in + [X.Contributions contributions; + X.Context context; + X.Related related; + X.Backlinks backlinks; + X.References references] + +and compile_internal_link ~title ~modifier ~addr ~dest = + let href = E.route addr in + let ancestors = Ancestors.read () in + let dest_title = + dest.fm.title |> Option.map @@ + Render_util.expand_title_with_parents ~ancestors dest.fm + in + let content = + title + |> Option.fold ~none:dest_title ~some:Option.some + |> Option.map (Sem.apply_modifier modifier) + |> Option.value ~default:[Range.locate_opt None @@ Sem.Text "Untitled"] + |> compile_nodes + in + let title = + match dest_title with + | None -> None + | Some t -> + let title_string = + String_util.sentence_case @@ + Render_text.Printer.contents @@ + Render_text.render t + in + Some title_string + in + let addr = Some (Format.asprintf "%a" pp_addr addr) in + [X.Link {type_ = `Local; href; title; content; addr}] + +and compile_external_link ~title ~modifier ~url = + let href = url in + let content = + title + |> Option.map (Sem.apply_modifier modifier) + |> Option.value ~default:[Range.locate_opt None @@ Sem.Text url] + |> compile_nodes + in + [X.Link {type_ = `External; href; content; title = None; addr = None}]; + +and compile_nodes (xs : Sem.t) = X.Content (List.concat_map compile_located xs) + +and compile_prim p xs = + let content = compile_nodes xs in + [X.Prim (p, content)] + +let compile_tree_top tree = + Ancestors.run ~env:[] @@ fun () -> + let env = Xmlns_map.assoc ~prefix:F.reserved_prefix ~xmlns:F.forester_xmlns Xmlns_map.empty in + Xmlns_prefixes.run ~env @@ fun () -> + compile_tree ~backmatter:true ~opts:Sem.default_transclusion_opts tree + + +let run kont = + Mainmatter_cache.run ~init:Addr_map.empty kont diff --git a/lib/render/Compile.mli b/lib/render/Compile.mli new file mode 100644 index 0000000..ef8f7e5 --- /dev/null +++ b/lib/render/Compile.mli @@ -0,0 +1,7 @@ +open Forester_core + +val run : (unit -> 'a) -> 'a +(** Initialises a cache for tree mainmatters in the given scope. *) + +val compile_tree_top : Sem.tree -> Xml_tree.tree_ +(** Must be called in the scope of {!run}. *) diff --git a/lib/render/Render_text.ml b/lib/render/Render_text.ml index 2a1e868..8f8e988 100644 --- a/lib/render/Render_text.ml +++ b/lib/render/Render_text.ml @@ -18,7 +18,6 @@ struct Format.asprintf "%a" (fun fmt _ -> printer fmt) () end - let rec render_node : Sem.node Range.located -> Printer.t = fun node -> match node.value with diff --git a/lib/render/Render_xml.ml b/lib/render/Render_xml.ml deleted file mode 100644 index 92d1c31..0000000 --- a/lib/render/Render_xml.ml @@ -1,441 +0,0 @@ -open Forester_core -open Forester_prelude - -open Pure_html - -module E = Render_effect.Perform -module F = Xml_forester - -module String_map = Map.Make (String) -module Addr_map = Map.Make (Addr) - -module Ancestors = Algaeff.Reader.Make (struct type t = addr list end) -module Current_addr = Algaeff.Reader.Make (struct type t = addr end) -module Mainmatter_cache = Algaeff.State.Make (struct type t = node Addr_map.t end) - -module Xmlns_map = -struct - type t = - {prefix_to_xmlns : string String_map.t; - xmlns_to_prefixes : string list String_map.t} - - let empty = - {prefix_to_xmlns = String_map.empty; - xmlns_to_prefixes = String_map.empty} - - let assoc ~prefix ~xmlns env = - {prefix_to_xmlns = String_map.add prefix xmlns env.prefix_to_xmlns; - xmlns_to_prefixes = String_map.add_to_list xmlns prefix env.xmlns_to_prefixes} -end - -module Xmlns_prefixes = Algaeff.Reader.Make (Xmlns_map) - -let rec normalise_prefix ?loc ~prefix ~xmlns kont = - match xmlns with - | Some xmlns -> - begin - let open Xmlns_map in - let env = Xmlns_prefixes.read () in - let exception Shadowing in - try - begin - match - String_map.find_opt prefix env.prefix_to_xmlns, - String_map.find_opt xmlns env.xmlns_to_prefixes - with - | None, (None | Some []) -> - let env = assoc ~prefix ~xmlns env in - Xmlns_prefixes.run ~env @@ fun () -> - kont @@ ([(prefix, xmlns)], prefix) - | Some xmlns', Some prefixes -> - if xmlns' = xmlns && List.mem prefix prefixes then - kont ([], prefix) - else - raise Shadowing - | _, Some (prefix' :: _) -> - kont ([], prefix') - | Some xmlns', None -> - raise Shadowing - end - with Shadowing -> - normalise_prefix ?loc ~prefix:(prefix ^ "_") ~xmlns:(Some xmlns) kont - end - | _ -> - kont ([], prefix) - -let optional kont opt = - match opt with - | Some x -> kont x - | None -> F.null [] - -let optional_ kont opt = - match opt with - | Some x -> kont x - | None -> F.null_ - -let render_date (date : Date.t) = - let date_addr = User_addr (Format.asprintf "%a" Date.pp date) in - F.date [ - E.get_doc date_addr |> optional_ @@ fun _ -> - F.href "%s" @@ E.route date_addr - ] [ - F.year [] "%i" (Date.year date); - Date.month date |> optional @@ F.month [] "%i"; - Date.day date |> optional @@ F.day [] "%i" - ] - -let render_dates (dates : Date.t list) = - F.null @@ List.map render_date dates - - -let rec render_located (located : Sem.node Range.located) = - match located.value with - | Sem.Prim (p, x) -> - F.prim p [] [render_nodes x] - - | Sem.Text text -> - txt "%s" text - - | Sem.Verbatim cdata -> - txt ~raw:true "<![CDATA[%s]]>" cdata - - | Sem.Math (mode, body) -> - let rendered = - let module TP = Render_TeX_like.Printer in - Str.global_replace (Str.regexp "\n") " " @@ - TP.contents @@ Render_TeX_like.render ~cfg:{tex = false} body - in - F.tex [ - match mode with - | Inline -> F.null_ - | Display -> F.display "block" - ] "<![CDATA[%s]]>" rendered - - | Sem.Link (addr, title, modifier) -> - begin - match E.get_doc addr with - | Some tree -> - render_internal_link ~title ~modifier ~addr ~dest:tree - | None -> - let url = Format.asprintf "%a" pp_addr addr in - render_external_link ~title ~modifier ~url - end - - | Sem.Ref addr -> - begin - match E.get_doc addr with - | None -> - Reporter.fatalf ?loc:located.loc Tree_not_found "could not find tree at address `%a` for reference" pp_addr addr - | Some tree -> - let url = E.route addr in - F.ref [ - F.addr_ "%s" (Format.asprintf "%a" pp_addr addr); - F.href "%s" url; - tree.fm.taxon |> Option.map String_util.sentence_case |> optional_ @@ F.taxon_ "%s"; - tree.fm.number |> optional_ @@ F.number_ "%s" - ] - end - - | Sem.Img path -> - F.img [F.src "%s" path] - - | Sem.If_tex (_, x) -> - render_nodes x - - | Sem.Xml_tag (name, attrs, xs) -> - - let rec fold_attrs tag_prefix updates acc attrs = - match attrs with - | [] -> - let xmlns_attrs = - updates |> List.map @@ fun (prefix, xmlns) -> - string_attr ("xmlns:" ^ prefix) "%s" xmlns - in - let tag_name = - match tag_prefix with - | "" -> name.uname - | prefix -> prefix ^ ":" ^ name.uname - in - std_tag - tag_name - (xmlns_attrs @ List.rev acc) - [render_nodes xs] - - | (k, v) :: attrs -> - normalise_prefix ?loc:located.loc ~prefix:k.prefix ~xmlns:k.xmlns @@ fun (updates', prefix) -> - let xml_attr = - let name = - match prefix with - | "" -> k.uname - | _ -> prefix ^ ":" ^ k.uname - in - string_attr name "%s" @@ - Render_text.Printer.contents @@ - Render_text.render v - in - fold_attrs tag_prefix (updates @ updates') (xml_attr :: acc) attrs - in - - normalise_prefix ~prefix:name.prefix ~xmlns:name.xmlns @@ fun (updates, tag_prefix) -> - fold_attrs tag_prefix updates [] attrs - - | Sem.TeX_cs name -> - Reporter.fatalf ?loc:located.loc Resolution_error - "unresolved TeX control sequence `\\%a`" TeX_cs.pp name - - | Sem.Object _ -> - Reporter.fatal ?loc:located.loc Type_error - "tried to render object closure to XML" - - | Sem.Embed_tex {preamble; source} -> - let as_tex x = - Render_TeX_like.Printer.contents @@ - Render_TeX_like.render ~cfg:{tex = true} x - in - let preamble = as_tex preamble in - let source = as_tex source in - let hash = Digest.to_hex @@ Digest.string @@ preamble ^ source in - E.enqueue_latex ~name:hash ~preamble ~source; - F.embedded_tex [F.hash "%s" hash] [ - F.embedded_tex_preamble [] "<![CDATA[%s]]>" preamble; - F.embedded_tex_body [] "<![CDATA[%s]]>" source - ] - - | Sem.Transclude (opts, addr) -> - begin - match E.get_doc addr with - | None -> - Reporter.fatalf ?loc:located.loc Tree_not_found "could not find tree at address `%a` for transclusion" pp_addr addr - | Some doc -> - render_transclusion ~opts doc - end - - | Sem.Subtree (opts, subtree) -> - render_transclusion ~opts subtree - - | Sem.Query (opts, query) -> - let trees = E.run_query query in - match trees with - | [] -> - F.prim `P [] [ - F.info [] [txt "Query returned no results"] - ] - | _ -> - render_nodes begin - trees |> List.map @@ fun (tree : Sem.tree) -> - let addr = tree.fm.addr in - let opts = Sem.{expanded = false; show_heading = true; title_override = None; taxon_override = None; toc = false; numbered = false; show_metadata = true} in - Range.locate_opt None @@ Sem.Transclude (opts, addr) - end - -and render_nodes nodes = - F.null @@ List.map render_located nodes - - -and render_transclusion ~opts (tree : Sem.tree) = - let current = Current_addr.read () in - let update old_ancestors = current :: old_ancestors in - Ancestors.scope update @@ fun () -> - render_tree ~opts tree - -and render_internal_link ~title ~modifier ~addr ~dest = - let url = E.route addr in - let ancestors = Ancestors.read () in - let dest_title = - dest.fm.title |> Option.map @@ - Render_util.expand_title_with_parents ~ancestors dest.fm - in - let target_title_attr = - match dest_title with - | None -> F.null_ - | Some t -> - let title_string = - String_util.sentence_case @@ - Render_text.Printer.contents @@ - Render_text.render t - in - F.title_ "%s" title_string - in - let title = - title - |> Option.fold ~none:dest_title ~some:Option.some - |> Option.map (Sem.apply_modifier modifier) - |> Option.value ~default:[Range.locate_opt None @@ Sem.Text "Untitled"] - in - F.link [ - F.href "%s" url; - F.type_ "local"; - F.addr_ "%s" (Format.asprintf "%a" pp_addr addr); - ] [render_nodes title] - -and render_external_link ~title ~modifier ~url = - let title = - title - |> Option.map (Sem.apply_modifier modifier) - |> Option.value ~default:[Range.locate_opt None @@ Sem.Text url] - in - F.link [ - F.href "%s" url; - F.type_ "external" - ] [render_nodes title] - -and render_author_name author = - let exception Untitled in - try - match E.get_doc author with - | None -> raise Untitled - | Some biotree -> - let addr = biotree.fm.addr in - let url = E.route addr in - F.link [ - F.href "%s" url; - F.type_ "local"; - F.addr_ "%s" (Format.asprintf "%a" pp_addr addr) - ] [ - match biotree.fm.title with - | None -> raise Untitled - | Some title -> render_nodes title - ] - with Untitled -> - txt "%s" (Format.asprintf "%a" pp_addr author) - -and render_author author = - F.author [] [render_author_name author] - -and render_contributor contributor = - F.contributor [] [render_author_name contributor] - -and render_authors ~contributors ~authors = - match authors, contributors with - | [], [] -> F.null [] - | _, _ -> - F.authors [] [ - F.null @@ List.map render_author authors; - F.null @@ List.map render_contributor contributors - ] - -and render_meta (key, body) = - F.meta [F.name "%s" key] [ - render_nodes body - ] - -and render_last_changed (fm : Sem.frontmatter) = - let addr = fm.addr in - let date = E.last_changed addr in - date |> optional @@ fun date -> F.last_changed [] [render_date date] - -and render_frontmatter ~opts (fm : Sem.frontmatter) = - let anchor = string_of_int @@ Oo.id (object end) in - let contributors = E.contributors fm.addr in - let authors = fm.authors in - - F.frontmatter [] [ - F.anchor [] "%s" anchor; - - begin - let taxon = - match Sem.(opts.taxon_override) with - | Some taxon -> Some taxon - | None -> fm.taxon - in - match taxon with - | None -> F.null [] - | Some taxon -> F.taxon [] "%s" @@ String_util.sentence_case taxon - end; - - begin - let addr = fm.addr in - F.null [ - F.addr [] "%s" (Format.asprintf "%a" pp_addr addr); - F.route [] "%s" @@ E.route addr - ] - end; - - begin - fm.source_path |> optional @@ fun path -> - F.source_path [] "%s" path - end; - - render_title ~opts fm; - render_dates fm.dates; - render_authors ~contributors ~authors; - fm.number |> optional @@ F.number [] "%s"; - begin - fm.designated_parent |> optional @@ fun addr -> - F.parent [] "%s" (Format.asprintf "%a" pp_addr addr) - end; - F.null @@ List.map render_meta fm.metas; - render_last_changed fm - ] - -and render_mainmatter nodes = - F.mainmatter [] [render_nodes nodes] - -and render_backmatter (addr : addr) = - let opts = - {Sem.default_transclusion_opts with - numbered = false} - in - let render_trees = List.map (render_tree ~opts) in - F.backmatter [] [ - F.contributions [] @@ render_trees @@ E.contributions addr; - F.context [] @@ render_trees @@ E.parents addr; - F.related [] @@ render_trees @@ E.related addr; - F.backlinks [] @@ render_trees @@ E.backlinks addr; - F.references [] @@ render_trees @@ E.bibliography addr - ] - -and render_title ~opts (fm : Sem.frontmatter) = - let ancestors = Ancestors.read () in - let title = - match opts.title_override with - | Some title -> Some title - | None -> - fm.title |> Option.map @@ - Render_util.expand_title_with_parents ~ancestors fm - in - title |> optional @@ fun title -> - F.title [] [render_nodes @@ Sem.sentence_case title] - -and render_tree ?(backmatter = false) ~opts (tree : Sem.tree) = - Current_addr.run ~env:tree.fm.addr @@ fun () -> - let ancestors = Ancestors.read () in - F.register_ns F.tree [ - F.toc opts.toc; - F.numbered opts.numbered; - F.show_heading opts.show_heading; - F.show_metadata opts.show_metadata; - F.expanded opts.expanded; - F.root @@ E.is_root tree.fm.addr - ] [ - render_frontmatter ~opts tree.fm; - begin - match tree.fm.addr with - | addr when List.mem addr ancestors -> - F.mainmatter [] [ - F.prim `P [] [ - F.info [] [txt "Transclusion cycle"] - ] - ] - | addr -> - let cache = Mainmatter_cache.get () in - match Addr_map.find_opt addr cache with - | Some cached -> cached - | None -> - let result = render_mainmatter tree.body in - Mainmatter_cache.modify (Addr_map.add addr result); - result - end; - match backmatter with - | true -> render_backmatter tree.fm.addr - | _ -> F.null [] - ] - -let render_tree_top (tree : Sem.tree) = - Ancestors.run ~env:[] @@ fun () -> - let env = Xmlns_map.assoc ~prefix:F.reserved_prefix ~xmlns:F.forester_xmlns Xmlns_map.empty in - Xmlns_prefixes.run ~env @@ fun () -> - render_tree ~backmatter:true ~opts:Sem.default_transclusion_opts tree - -let with_mainmatter_cache kont = - Mainmatter_cache.run ~init:Addr_map.empty kont diff --git a/lib/render/Serialise_xml_tree.ml b/lib/render/Serialise_xml_tree.ml new file mode 100644 index 0000000..21d4e75 --- /dev/null +++ b/lib/render/Serialise_xml_tree.ml @@ -0,0 +1,169 @@ +open Forester_prelude +open Forester_core + +module X = Xml_tree +module F = Xml_forester +module P = Pure_html + +module String_map = Map.Make (String) + +module Mainmatter_cache = Algaeff.State.Make (struct type t = P.node String_map.t end) + +let render_xml_qname = + function + | X.{prefix = ""; uname; _} -> uname + | X.{prefix; uname; _} -> Format.sprintf "%s:%s" prefix uname + +let render_xml_attr X.{key; value} = + P.string_attr (render_xml_qname key) "%s" value + +let render_date (X.Date date) = + F.date [ + date.href |> F.optional_ @@ F.href "%s" + ] [ + F.year [] "%i" date.year; + date.month |> F.optional @@ F.month [] "%i"; + date.day |> F.optional @@ F.day [] "%i" + ] + +let rec render_tree (X.Tree tree) = + F.tree [ + F.toc tree.options.toc; + F.numbered tree.options.numbered; + F.show_heading tree.options.show_heading; + F.show_metadata tree.options.show_metadata; + F.expanded tree.options.expanded; + F.root tree.options.root; + P.string_attr ("xmlns:" ^ F.reserved_prefix) "%s" F.forester_xmlns + ] [ + render_frontmatter tree.frontmatter; + begin + let cache = Mainmatter_cache.get () in + let key = tree.frontmatter.route in + match String_map.find_opt key cache with + | Some cached -> cached + | None -> + let result = render_mainmatter tree.mainmatter in + Mainmatter_cache.modify (String_map.add key result); + result + end; + tree.backmatter |> F.optional render_backmatter + ] + +and render_frontmatter (fm : _ X.frontmatter) = + F.frontmatter [] [ + F.anchor [] "%s" fm.anchor; + F.addr [] "%s" fm.addr; + F.route [] "%s" fm.route; + fm.title |> Option.map render_content |> F.optional @@ F.title []; + fm.taxon |> F.optional @@ F.taxon [] "%s"; + fm.source_path |> F.optional @@ F.source_path [] "%s"; + fm.dates |> List.map render_date |> F.null; + fm.attributions |> List.map render_attribution_elt |> F.null; + fm.number |> F.optional @@ F.number [] "%s"; + fm.designated_parent |> F.optional @@ F.parent [] "%s"; + fm.metas |> List.map render_meta |> F.null + ] + +and render_mainmatter mm = + F.mainmatter [] @@ render_content mm + +and render_backmatter (bm : _ X.backmatter) = + F.backmatter [] @@ List.map render_backmatter_elt bm + +and render_backmatter_elt = + let render_trees trees = + trees |> List.map @@ fun tree -> + render_tree @@ X.Tree tree + in + function + | Contributions trees -> + F.contributions [] @@ render_trees trees + | Context trees -> + F.context [] @@ render_trees trees + | Related trees -> + F.related [] @@ render_trees trees + | Backlinks trees -> + F.backlinks [] @@ render_trees trees + | References trees -> + F.references [] @@ render_trees trees + +and render_meta (Meta meta) = + F.meta [F.name "%s" meta.key] @@ + render_content meta.body + +and render_attribution_elt = + function + | X.Author x -> + F.author [] @@ render_content x + | X.Contributor x -> + F.contributor [] @@ render_content x + +and render_content (X.Content xs) = + List.map render_content_node xs + + +and render_content_node = + function + | X.Text x -> + P.txt "%s" x + | X.CDATA x -> + P.txt ~raw:true "<![CDATA[%s]]>" x + | X.Prim (p, x) -> + F.prim p [] @@ render_content x + | X.Xml_tag {name; attrs; content} -> + P.std_tag + (render_xml_qname name) + (List.map render_xml_attr attrs) + (render_content content) + | X.Subtree tree -> + render_tree tree + | X.Ref ref -> + F.ref [ + F.addr_ "%s" ref.addr; + F.href "%s" ref.href; + ref.taxon |> F.optional_ @@ F.taxon_ "%s"; + ref.number |> F.optional_ @@ F.number_ "%s" + ] + | X.Link link -> + let type_ = + match link.type_ with + | `Local -> "local" + | `External -> "external" + in + F.link [ + F.type_ "%s" type_; + F.href "%s" link.href; + link.addr |> F.optional_ @@ F.addr_ "%s"; + link.title |> F.optional_ @@ F.title_ "%s" + ] @@ render_content link.content + | X.TeX tex -> + let display = + match tex.display with + | `Inline -> "inline" + | `Block -> "block" + in + F.tex [F.display "%s" display] "<![CDATA[%s]]>" tex.body + | X.Img img -> + F.img [F.src "%s" img.src] + | X.Embedded_tex emb -> + F.embedded_tex [] [ + F.embedded_tex_preamble [] "<![CDATA[%s]]>" emb.preamble; + F.embedded_tex_body [] "<![CDATA[%s]]>" emb.source + ] + | X.Info x -> + F.info [] [P.txt "%s" x] + + +let pp ?stylesheet fmt tree = + Format.fprintf fmt {|<?xml version="1.0" encoding="UTF-8"?>|}; + Format.pp_print_newline fmt (); + begin + stylesheet |> Option.iter @@ fun uri -> + Format.fprintf fmt "<?xml-stylesheet type=\"text/xsl\" href=\"%s\"?>" uri + end; + Format.pp_print_newline fmt (); + P.pp_xml fmt @@ render_tree tree + +let run kont = + Mainmatter_cache.run ~init:String_map.empty kont diff --git a/lib/render/Serialise_xml_tree.mli b/lib/render/Serialise_xml_tree.mli new file mode 100644 index 0000000..0487f56 --- /dev/null +++ b/lib/render/Serialise_xml_tree.mli @@ -0,0 +1,7 @@ +open Forester_core + +val run : (unit -> 'a) -> 'a +(** Initialises a cache for tree mainmatters in the given scope. *) + +val pp : ?stylesheet:string -> Format.formatter -> Xml_tree.tree_ -> unit +(** Must be called within the scope of {!run}. *) \ No newline at end of file diff --git a/lib/render/Xml_forester.ml b/lib/render/Xml_forester.ml index fbb48c2..2d79e95 100644 --- a/lib/render/Xml_forester.ml +++ b/lib/render/Xml_forester.ml @@ -3,14 +3,27 @@ open Pure_html let reserved_prefix = "fr" let forester_xmlns = "http://www.jonmsterling.com/jms-005P.xml" + +let null = HTML.null +let null_ = HTML.null_ + +let optional kont opt = + match opt with + | Some x -> kont x + | None -> null [] + +let optional_ kont opt = + match opt with + | Some x -> kont x + | None -> null_ + + let register_ns tag attrs = let f_xmlns = string_attr ("xmlns:" ^ reserved_prefix) "%s" forester_xmlns in tag (f_xmlns :: attrs) let add_ns name = Format.sprintf "%s:%s" reserved_prefix name -let null = HTML.null -let null_ = HTML.null_ let f_std_tag name = std_tag @@ add_ns name let f_text_tag name = text_tag @@ add_ns name diff --git a/lib/render/Xml_forester.mli b/lib/render/Xml_forester.mli index 6ae97bf..fdc63a9 100644 --- a/lib/render/Xml_forester.mli +++ b/lib/render/Xml_forester.mli @@ -6,6 +6,9 @@ val forester_xmlns : string val register_ns : std_tag -> std_tag +val optional : ('a -> node) -> 'a option -> node +val optional_ : ('a -> attr) -> 'a option -> attr + val null : node list -> node val null_ : attr -- 2.43.4