~jonsterling/forester-devel

This thread contains a patchset. You're looking at the original emails, but you may wish to use the patch review UI. Review patch

[PATCH ocaml-forester] Create intermediate datatype codifying XML format

Details
Message ID
<171871777181.19972.2692963345681738945-0@git.sr.ht>
DKIM signature
missing
Download raw message
Patch: +720 -451
From: Jon Sterling <jon@jonmsterling.com>

Closes: https://todo.sr.ht/~jonsterling/forester/64
---
 lib/core/Forester_core.ml         |   2 +
 lib/core/Xml_tree.ml              | 117 ++++++++
 lib/frontend/Forest.ml            |  11 +-
 lib/render/Compile.ml             | 396 +++++++++++++++++++++++++++
 lib/render/Compile.mli            |   7 +
 lib/render/Render_text.ml         |   1 -
 lib/render/Render_xml.ml          | 441 ------------------------------
 lib/render/Serialise_xml_tree.ml  | 169 ++++++++++++
 lib/render/Serialise_xml_tree.mli |   7 +
 lib/render/Xml_forester.ml        |  17 +-
 lib/render/Xml_forester.mli       |   3 +
 11 files changed, 720 insertions(+), 451 deletions(-)
 create mode 100644 lib/core/Xml_tree.ml
 create mode 100644 lib/render/Compile.ml
 create mode 100644 lib/render/Compile.mli
 delete mode 100644 lib/render/Render_xml.ml
 create mode 100644 lib/render/Serialise_xml_tree.ml
 create mode 100644 lib/render/Serialise_xml_tree.mli

diff --git a/lib/core/Forester_core.ml b/lib/core/Forester_core.ml
index f424d05..e68d0cc 100644
--- a/lib/core/Forester_core.ml
+++ b/lib/core/Forester_core.ml
@@ -16,3 +16,5 @@ module Prim = Prim

module TeX_cs = TeX_cs
module Symbol = Symbol

module Xml_tree = Xml_tree
\ No newline at end of file
diff --git a/lib/core/Xml_tree.ml b/lib/core/Xml_tree.ml
new file mode 100644
index 0000000..9afc002
--- /dev/null
+++ b/lib/core/Xml_tree.ml
@@ -0,0 +1,117 @@
open Forester_prelude

type xml_qname = {
  prefix : string;
  (** The prefix to a qualified XML name; this prefix is expected to be rendered in the scope of a corresponding [xmlns] binding. *)

  uname : string;
  (** The unqualified part of the XML name. *)

  xmlns : string option
  (** The XML namespace bound by the current scope to [prefix]. This is not used when serialising to XML, but can be helpful for other analyses. *)
}

type xml_attr = {key : xml_qname; value : string}

type 'content attribution =
  | Author of 'content
  | Contributor of 'content

type date =
  | Date of {
      href : string option;
      year : int;
      month : int option;
      day : int option
    }

type 'content meta =
  | Meta of {
      key : string;
      body : 'content
    }

type link_type = [`Local | `External]

type ('content, 'tree) content_node =
  | Text of string
  | CDATA of string
  | Xml_tag of {
      name : xml_qname;
      attrs : xml_attr list;
      content : 'content
    }
  | Prim of Prim.t * 'content
  | Subtree of 'tree
  | Ref of {
      addr : string;
      href : string;
      taxon : string option;
      number : string option
    }
  | Link of {
      type_ : link_type;
      href : string;
      title : string option;
      addr : string option;
      content : 'content
    }
  | TeX of {
      display : [`Inline | `Block];
      body : string
    }
  | Img of {src : string}
  | Embedded_tex of {
      preamble : string;
      source : string
    }
  | Info of string


type 'content frontmatter = {
  title : 'content option;
  anchor : string;
  number : string option;
  taxon : string option;
  designated_parent : string option;
  metas : 'content meta list;
  route : string;
  addr : string;
  source_path : string option;
  dates : date list;
  last_changed : date option;
  attributions : 'content attribution list
}

(* TODO: generalise *)
type 'tree backmatter_elt =
  | Contributions of 'tree list
  | Related of 'tree list
  | Backlinks of 'tree list
  | Context of 'tree list
  | References of 'tree list

type 'tree backmatter = 'tree backmatter_elt list

type tree_options = {
  toc : bool;
  numbered : bool;
  show_heading : bool;
  show_metadata : bool;
  expanded : bool;
  root : bool
}

type 'content tree = {
  options : tree_options;
  frontmatter : 'content frontmatter;
  mainmatter : 'content;
  backmatter : 'content tree backmatter option
}

(* Tie the knot *)
type tree_ = Tree of content tree
and content = Content of (content, tree_) content_node list

let splice (Content xs) = xs
let splice_tree (Tree tree) = tree
\ No newline at end of file
diff --git a/lib/frontend/Forest.ml b/lib/frontend/Forest.ml
index da815ef..4b5fdb5 100644
--- a/lib/frontend/Forest.ml
+++ b/lib/frontend/Forest.ml
@@ -264,12 +264,8 @@ let render_tree ~cfg ~cwd (tree : Sem.tree) =
    Eio.Path.with_open_out ~create path @@ fun flow ->
    Eio.Buf_write.with_flow flow @@ fun writer ->
    let fmt = Eio_util.formatter_of_writer writer in
    let node = Render_xml.render_tree_top tree in
    Format.fprintf fmt {|<?xml version="1.0" encoding="UTF-8"?>|};
    Format.pp_print_newline fmt ();
    Format.fprintf fmt "<?xml-stylesheet type=\"text/xsl\" href=\"%s\"?>" cfg.stylesheet;
    Format.pp_print_newline fmt ();
    Pure_html.pp fmt node
    Serialise_xml_tree.pp ~stylesheet:cfg.stylesheet fmt @@
    Compile.compile_tree_top tree
  end

let render_json ~cwd docs =
@@ -320,7 +316,8 @@ let render_trees ~cfg ~forest ~render_only : unit =
  Eio_util.ensure_dir_path cwd ["output"; "resources"];

  run_renderer ~cfg forest @@ fun () ->
  Render_xml.with_mainmatter_cache @@ fun () ->
  Compile.run @@ fun () ->
  Serialise_xml_tree.run @@ fun () ->
  let trees =
    match render_only with
    | None -> forest.trees |> M.to_seq |> Seq.map snd |> List.of_seq
diff --git a/lib/render/Compile.ml b/lib/render/Compile.ml
new file mode 100644
index 0000000..6e89df0
--- /dev/null
+++ b/lib/render/Compile.ml
@@ -0,0 +1,396 @@
open Forester_prelude
open Forester_core

module X = Xml_tree
module F = Xml_forester
module E = Render_effect.Perform

module String_map = Map.Make (String)
module Addr_map = Map.Make (Addr)

module Ancestors = Algaeff.Reader.Make (struct type t = addr list end)
module Current_addr = Algaeff.Reader.Make (struct type t = addr end)
module Mainmatter_cache = Algaeff.State.Make (struct type t = X.content Addr_map.t end)

module Xmlns_map =
struct
  type t =
    {prefix_to_xmlns : string String_map.t;
     xmlns_to_prefixes : string list String_map.t}

  let empty =
    {prefix_to_xmlns = String_map.empty;
     xmlns_to_prefixes = String_map.empty}

  let assoc ~prefix ~xmlns env =
    {prefix_to_xmlns = String_map.add prefix xmlns env.prefix_to_xmlns;
     xmlns_to_prefixes = String_map.add_to_list xmlns prefix env.xmlns_to_prefixes}
end

module Xmlns_prefixes = Algaeff.Reader.Make (Xmlns_map)

let get_xmlns_for_prefix prefix =
  let env = Xmlns_prefixes.read ()  in
  String_map.find_opt prefix env.prefix_to_xmlns

let rec normalise_prefix ?loc ~prefix ~xmlns kont =
  match xmlns with
  | Some xmlns ->
    begin
      let open Xmlns_map in
      let env = Xmlns_prefixes.read () in
      let exception Shadowing in
      try
        begin
          match
            String_map.find_opt prefix env.prefix_to_xmlns,
            String_map.find_opt xmlns env.xmlns_to_prefixes
          with
          | None, (None | Some []) ->
            let env = assoc ~prefix ~xmlns env in
            Xmlns_prefixes.run ~env @@ fun () ->
            kont @@ ([(prefix, xmlns)], prefix)
          | Some xmlns', Some prefixes ->
            if xmlns' = xmlns && List.mem prefix prefixes then
              kont ([], prefix)
            else
              raise Shadowing
          | _, Some (prefix' :: _) ->
            kont ([], prefix')
          | Some xmlns', None ->
            raise Shadowing
        end
      with Shadowing ->
        normalise_prefix ?loc ~prefix:(prefix ^ "_") ~xmlns:(Some xmlns) kont
    end
  | _ ->
    kont ([], prefix)


let compile_date (date : Date.t) =
  let date_addr = User_addr (Format.asprintf "%a" Date.pp date) in
  let href = E.get_doc date_addr |> Option.map @@ fun _doc -> E.route date_addr in
  let year = Date.year date in
  let month = Date.month date in
  let day = Date.day date in
  X.Date {href; year; month; day}

let compile_dates = List.map compile_date

let rec compile_located (located : Sem.node Range.located) =
  match located.value with
  | Sem.Text txt ->
    [X.Text txt]
  | Sem.Verbatim txt ->
    [X.CDATA txt]
  | Sem.Prim (p, xs) ->
    compile_prim p xs
  | Sem.Math (mode, xs) ->
    let body =
      let module TP = Render_TeX_like.Printer in
      Str.global_replace (Str.regexp "\n") " " @@
      TP.contents @@ Render_TeX_like.render ~cfg:{tex = false} xs
    in
    let display =
      match mode with
      | Inline -> `Inline
      | Display -> `Block
    in
    [X.TeX {display; body}]

  | Sem.Link (addr, title, modifier) ->
    begin
      match E.get_doc addr with
      | Some tree ->
        compile_internal_link ~title ~modifier ~addr ~dest:tree
      | None ->
        let url = Format.asprintf "%a" pp_addr addr in
        compile_external_link ~title ~modifier ~url
    end

  | Sem.Ref addr ->
    begin
      match E.get_doc addr with
      | None ->
        Reporter.fatalf ?loc:located.loc Tree_not_found "could not find tree at address `%a` for reference" pp_addr addr
      | Some tree ->
        let href = E.route addr in
        let addr = Format.asprintf "%a" pp_addr addr in
        let taxon = tree.fm.taxon |> Option.map String_util.sentence_case in
        let number = tree.fm.number in
        [X.Ref {addr; taxon; href; number}]
    end

  | Sem.Img path ->
    [X.Img {src = path}]

  | Sem.If_tex (_, xs) ->
    X.splice @@ compile_nodes xs

  | Sem.Xml_tag (name, attrs, xs) ->
    let rec fold_attrs tag_prefix updates acc attrs  =
      match attrs with
      | [] ->
        let xmlns_attrs =
          updates |> List.map @@ fun (prefix, xmlns) ->
          X.{key =
               X.{prefix = "xmlns";
                  uname = prefix;
                  xmlns = None};
             value = xmlns}
        in
        let name = X.{prefix = tag_prefix; uname = name.uname; xmlns = get_xmlns_for_prefix tag_prefix} in
        let attrs = xmlns_attrs @ List.rev acc in
        let content = compile_nodes xs in
        X.Xml_tag {name; attrs; content}

      | (k, v) :: attrs ->
        normalise_prefix ?loc:located.loc ~prefix:k.prefix ~xmlns:k.xmlns @@ fun (updates', prefix) ->
        let xml_attr =
          X.{key = X.{prefix; uname = k.uname; xmlns = None};
             value = Render_text.Printer.contents @@ Render_text.render v}
        in
        fold_attrs tag_prefix (updates @ updates') (xml_attr :: acc) attrs
    in

    [normalise_prefix ~prefix:name.prefix ~xmlns:name.xmlns @@ fun (updates, tag_prefix) ->
     fold_attrs tag_prefix updates [] attrs]

  | Sem.TeX_cs name ->
    Reporter.fatalf ?loc:located.loc Resolution_error
      "unresolved TeX control sequence `\\%a`" TeX_cs.pp name

  | Sem.Object _ ->
    Reporter.fatal ?loc:located.loc Type_error
      "tried to compile object closure to XML"

  | Sem.Embed_tex {preamble; source} ->
    let as_tex x =
      Render_TeX_like.Printer.contents @@
      Render_TeX_like.render ~cfg:{tex = true} x
    in
    let preamble = as_tex preamble in
    let source = as_tex source in
    let hash = Digest.to_hex @@ Digest.string @@ preamble ^ source in
    (* TODO: the following should be done during evaluation! *)
    E.enqueue_latex ~name:hash ~preamble ~source;
    [X.Embedded_tex {preamble; source}]

  | Sem.Transclude (opts, addr) ->
    begin
      match E.get_doc addr with
      | None ->
        Reporter.fatalf ?loc:located.loc Tree_not_found "could not find tree at address `%a` for transclusion" pp_addr addr
      | Some doc ->
        compile_transclusion ~opts doc
    end

  | Sem.Subtree (opts, subtree) ->
    compile_transclusion ~opts subtree

  | Sem.Query (opts, query) ->
    let trees = E.run_query query in
    begin
      match trees with
      | [] ->
        [X.Prim (`P, X.Content [X.Info "Transclusion cycle"])]
      | _ ->
        trees |> List.concat_map @@ fun tree ->
        let opts = Sem.{expanded = false; show_heading = true; title_override = None; taxon_override = None; toc = false; numbered = false; show_metadata = true} in
        compile_transclusion ~opts tree
    end

and compile_transclusion ~opts (tree : Sem.tree) =
  let current = Current_addr.read () in
  let update old_ancestors = current :: old_ancestors in
  Ancestors.scope update @@ fun () ->
  [X.Subtree (compile_tree ~opts tree)]

and compile_title ~(opts : Sem.transclusion_opts) (fm : Sem.frontmatter) =
  let ancestors = Ancestors.read () in
  let title =
    match opts.title_override with
    | Some title -> Some title
    | None ->
      fm.title |> Option.map @@
      Render_util.expand_title_with_parents ~ancestors fm
  in
  title |> Option.map @@ fun title ->
  compile_nodes @@ Sem.sentence_case title

and compile_attributions ~contributors ~authors =
  match authors, contributors with
  | [], [] -> []
  | _ ->
    List.map compile_author authors @ List.map compile_contributor contributors

and compile_author author =
  X.Author (compile_attribution_inner author)

and compile_contributor author =
  X.Contributor (compile_attribution_inner author)

and compile_attribution_inner author =
  let exception Untitled in
  try
    match E.get_doc author with
    | None -> raise Untitled
    | Some biotree ->
      let href = E.route biotree.fm.addr in
      let content =
        match biotree.fm.title with
        | None -> raise Untitled
        | Some title -> compile_nodes title
      in
      let title = biotree.fm.title |> Option.map Sem.string_of_nodes in
      let addr = Option.some @@ Format.asprintf "%a" pp_addr author in
      X.Content [X.Link {type_ = `Local; href; title; addr; content}]
  with Untitled ->
    let name = Format.asprintf "%a" pp_addr author in
    X.Content [X.Text name]


and compile_meta (key, body) =
  let body = compile_nodes body in
  X.Meta {key; body}

and compile_frontmatter ~opts (fm : Sem.frontmatter)  =
  let anchor = string_of_int @@ Oo.id (object end) in
  let title = compile_title ~opts fm in
  let number = fm.number in
  let taxon =
    Option.map String_util.sentence_case @@
    match opts.taxon_override with
    | Some taxon -> Some taxon
    | None -> fm.taxon
  in
  let route = E.route fm.addr in
  let source_path = fm.source_path in
  let addr = Format.asprintf "%a" pp_addr fm.addr in
  let designated_parent =
    fm.designated_parent |> Option.map @@ fun addr ->
    Format.asprintf "%a" pp_addr addr
  in
  let dates = compile_dates fm.dates in
  let attributions = compile_attributions ~contributors:(E.contributors fm.addr) ~authors:fm.authors in
  let last_changed = E.last_changed fm.addr |> Option.map compile_date in
  let metas = fm.metas |> List.map compile_meta in
  X.{title;
     anchor;
     number;
     taxon;
     designated_parent;
     metas;
     route;
     addr;
     source_path;
     dates;
     last_changed;
     attributions}

and compile_tree ?(backmatter = false) ~opts (tree : Sem.tree) =
  Current_addr.run ~env:tree.fm.addr @@ fun  () ->
  let ancestors = Ancestors.read () in
  let options =
    X.{toc = opts.toc;
       numbered = opts.numbered;
       show_heading = opts.show_heading;
       show_metadata = opts.show_metadata;
       expanded = opts.expanded;
       root = E.is_root tree.fm.addr}
  in
  let frontmatter = compile_frontmatter ~opts tree.fm in
  let mainmatter =
    begin
      match tree.fm.addr with
      | addr when List.mem addr ancestors ->
        X.Content [X.Prim (`P, X.Content [X.Info "Transclusion cycle"])]
      | addr ->
        let cache = Mainmatter_cache.get () in
        match Addr_map.find_opt addr cache with
        | Some cached -> cached
        | None ->
          let result = compile_nodes tree.body in
          Mainmatter_cache.modify @@ Addr_map.add addr result;
          result
    end;

  in
  let backmatter =
    if backmatter then
      Some (compile_backmatter tree.fm.addr)
    else
      None
  in
  X.Tree {options; frontmatter; mainmatter; backmatter}

and compile_backmatter addr =
  let opts = {Sem.default_transclusion_opts with numbered = false} in
  let compile_trees =
    List.map @@ fun tree ->
    X.splice_tree @@ compile_tree ~opts tree
  in
  let contributions = compile_trees @@ E.contributions addr in
  let context = compile_trees @@ E.parents addr in
  let related = compile_trees @@ E.related addr in
  let backlinks = compile_trees @@ E.backlinks addr in
  let references = compile_trees @@ E.bibliography addr in
  [X.Contributions contributions;
   X.Context context;
   X.Related related;
   X.Backlinks backlinks;
   X.References references]

and compile_internal_link ~title ~modifier ~addr ~dest =
  let href = E.route addr in
  let ancestors = Ancestors.read () in
  let dest_title =
    dest.fm.title |> Option.map @@
    Render_util.expand_title_with_parents ~ancestors dest.fm
  in
  let content =
    title
    |> Option.fold ~none:dest_title ~some:Option.some
    |> Option.map (Sem.apply_modifier modifier)
    |> Option.value ~default:[Range.locate_opt None @@ Sem.Text "Untitled"]
    |> compile_nodes
  in
  let title =
    match dest_title with
    | None -> None
    | Some t ->
      let title_string =
        String_util.sentence_case @@
        Render_text.Printer.contents @@
        Render_text.render t
      in
      Some title_string
  in
  let addr = Some (Format.asprintf "%a" pp_addr addr) in
  [X.Link {type_ = `Local; href; title; content; addr}]

and compile_external_link ~title ~modifier ~url =
  let href = url in
  let content =
    title
    |> Option.map (Sem.apply_modifier modifier)
    |> Option.value ~default:[Range.locate_opt None @@ Sem.Text url]
    |> compile_nodes
  in
  [X.Link {type_ = `External; href; content; title = None; addr = None}];

and compile_nodes (xs : Sem.t) = X.Content (List.concat_map compile_located xs)

and compile_prim p xs =
  let content = compile_nodes xs in
  [X.Prim (p, content)]

let compile_tree_top tree =
  Ancestors.run ~env:[] @@ fun () ->
  let env = Xmlns_map.assoc ~prefix:F.reserved_prefix ~xmlns:F.forester_xmlns Xmlns_map.empty in
  Xmlns_prefixes.run ~env @@ fun () ->
  compile_tree ~backmatter:true ~opts:Sem.default_transclusion_opts tree


let run kont =
  Mainmatter_cache.run ~init:Addr_map.empty kont
diff --git a/lib/render/Compile.mli b/lib/render/Compile.mli
new file mode 100644
index 0000000..ef8f7e5
--- /dev/null
+++ b/lib/render/Compile.mli
@@ -0,0 +1,7 @@
open Forester_core

val run : (unit -> 'a) -> 'a
(** Initialises a cache for tree mainmatters in the given scope. *)

val compile_tree_top : Sem.tree -> Xml_tree.tree_
(** Must be called in the scope of {!run}. *)
diff --git a/lib/render/Render_text.ml b/lib/render/Render_text.ml
index 2a1e868..8f8e988 100644
--- a/lib/render/Render_text.ml
+++ b/lib/render/Render_text.ml
@@ -18,7 +18,6 @@ struct
    Format.asprintf "%a" (fun fmt _ -> printer fmt) ()
end


let rec render_node : Sem.node Range.located -> Printer.t =
  fun node ->
  match node.value with
diff --git a/lib/render/Render_xml.ml b/lib/render/Render_xml.ml
deleted file mode 100644
index 92d1c31..0000000
--- a/lib/render/Render_xml.ml
@@ -1,441 +0,0 @@
open Forester_core
open Forester_prelude

open Pure_html

module E = Render_effect.Perform
module F = Xml_forester

module String_map = Map.Make (String)
module Addr_map = Map.Make (Addr)

module Ancestors = Algaeff.Reader.Make (struct type t = addr list end)
module Current_addr = Algaeff.Reader.Make (struct type t = addr end)
module Mainmatter_cache = Algaeff.State.Make (struct type t = node Addr_map.t end)

module Xmlns_map =
struct
  type t =
    {prefix_to_xmlns : string String_map.t;
     xmlns_to_prefixes : string list String_map.t}

  let empty =
    {prefix_to_xmlns = String_map.empty;
     xmlns_to_prefixes = String_map.empty}

  let assoc ~prefix ~xmlns env =
    {prefix_to_xmlns = String_map.add prefix xmlns env.prefix_to_xmlns;
     xmlns_to_prefixes = String_map.add_to_list xmlns prefix env.xmlns_to_prefixes}
end

module Xmlns_prefixes = Algaeff.Reader.Make (Xmlns_map)

let rec normalise_prefix ?loc ~prefix ~xmlns kont =
  match xmlns with
  | Some xmlns ->
    begin
      let open Xmlns_map in
      let env = Xmlns_prefixes.read () in
      let exception Shadowing in
      try
        begin
          match
            String_map.find_opt prefix env.prefix_to_xmlns,
            String_map.find_opt xmlns env.xmlns_to_prefixes
          with
          | None, (None | Some []) ->
            let env = assoc ~prefix ~xmlns env in
            Xmlns_prefixes.run ~env @@ fun () ->
            kont @@ ([(prefix, xmlns)], prefix)
          | Some xmlns', Some prefixes ->
            if xmlns' = xmlns && List.mem prefix prefixes then
              kont ([], prefix)
            else
              raise Shadowing
          | _, Some (prefix' :: _) ->
            kont ([], prefix')
          | Some xmlns', None ->
            raise Shadowing
        end
      with Shadowing ->
        normalise_prefix ?loc ~prefix:(prefix ^ "_") ~xmlns:(Some xmlns) kont
    end
  | _ ->
    kont ([], prefix)

let optional kont opt =
  match opt with
  | Some x -> kont x
  | None -> F.null []

let optional_ kont opt =
  match opt with
  | Some x -> kont x
  | None -> F.null_

let render_date (date : Date.t) =
  let date_addr = User_addr (Format.asprintf "%a" Date.pp date) in
  F.date [
    E.get_doc date_addr |> optional_ @@ fun _ ->
    F.href "%s" @@ E.route date_addr
  ] [
    F.year [] "%i" (Date.year date);
    Date.month date |> optional @@ F.month [] "%i";
    Date.day date |> optional @@ F.day [] "%i"
  ]

let render_dates (dates : Date.t list) =
  F.null @@ List.map render_date dates


let rec render_located (located : Sem.node Range.located) =
  match located.value with
  | Sem.Prim (p, x) ->
    F.prim p [] [render_nodes x]

  | Sem.Text text ->
    txt "%s" text

  | Sem.Verbatim cdata ->
    txt ~raw:true "<![CDATA[%s]]>" cdata

  | Sem.Math (mode, body) ->
    let rendered =
      let module TP = Render_TeX_like.Printer in
      Str.global_replace (Str.regexp "\n") " " @@
      TP.contents @@ Render_TeX_like.render ~cfg:{tex = false} body
    in
    F.tex [
      match mode with
      | Inline -> F.null_
      | Display -> F.display "block"
    ] "<![CDATA[%s]]>" rendered

  | Sem.Link (addr, title, modifier) ->
    begin
      match E.get_doc addr with
      | Some tree ->
        render_internal_link ~title ~modifier ~addr ~dest:tree
      | None ->
        let url = Format.asprintf "%a" pp_addr addr in
        render_external_link ~title ~modifier ~url
    end

  | Sem.Ref addr ->
    begin
      match E.get_doc addr with
      | None ->
        Reporter.fatalf ?loc:located.loc Tree_not_found "could not find tree at address `%a` for reference" pp_addr addr
      | Some tree ->
        let url = E.route addr in
        F.ref [
          F.addr_ "%s" (Format.asprintf "%a" pp_addr addr);
          F.href "%s" url;
          tree.fm.taxon |> Option.map String_util.sentence_case |> optional_ @@ F.taxon_ "%s";
          tree.fm.number |> optional_ @@ F.number_ "%s"
        ]
    end

  | Sem.Img path ->
    F.img [F.src "%s" path]

  | Sem.If_tex (_, x) ->
    render_nodes x

  | Sem.Xml_tag (name, attrs, xs) ->

    let rec fold_attrs tag_prefix updates acc attrs  =
      match attrs with
      | [] ->
        let xmlns_attrs =
          updates |> List.map @@ fun (prefix, xmlns) ->
          string_attr ("xmlns:" ^ prefix) "%s" xmlns
        in
        let tag_name =
          match tag_prefix with
          | "" -> name.uname
          | prefix -> prefix ^ ":" ^ name.uname
        in
        std_tag
          tag_name
          (xmlns_attrs @ List.rev acc)
          [render_nodes xs]

      | (k, v) :: attrs ->
        normalise_prefix ?loc:located.loc ~prefix:k.prefix ~xmlns:k.xmlns @@ fun (updates', prefix) ->
        let xml_attr =
          let name =
            match prefix with
            | "" -> k.uname
            | _ -> prefix ^ ":" ^ k.uname
          in
          string_attr name "%s" @@
          Render_text.Printer.contents @@
          Render_text.render v
        in
        fold_attrs tag_prefix (updates @ updates') (xml_attr :: acc) attrs
    in

    normalise_prefix ~prefix:name.prefix ~xmlns:name.xmlns @@ fun (updates, tag_prefix) ->
    fold_attrs tag_prefix updates [] attrs

  | Sem.TeX_cs name ->
    Reporter.fatalf ?loc:located.loc Resolution_error
      "unresolved TeX control sequence `\\%a`" TeX_cs.pp name

  | Sem.Object _ ->
    Reporter.fatal ?loc:located.loc Type_error
      "tried to render object closure to XML"

  | Sem.Embed_tex {preamble; source} ->
    let as_tex x =
      Render_TeX_like.Printer.contents @@
      Render_TeX_like.render ~cfg:{tex = true} x
    in
    let preamble = as_tex preamble in
    let source = as_tex source in
    let hash = Digest.to_hex @@ Digest.string @@ preamble ^ source in
    E.enqueue_latex ~name:hash ~preamble ~source;
    F.embedded_tex [F.hash "%s" hash] [
      F.embedded_tex_preamble [] "<![CDATA[%s]]>" preamble;
      F.embedded_tex_body [] "<![CDATA[%s]]>" source
    ]

  | Sem.Transclude (opts, addr) ->
    begin
      match E.get_doc addr with
      | None ->
        Reporter.fatalf ?loc:located.loc Tree_not_found "could not find tree at address `%a` for transclusion" pp_addr addr
      | Some doc ->
        render_transclusion ~opts doc
    end

  | Sem.Subtree (opts, subtree) ->
    render_transclusion ~opts subtree

  | Sem.Query (opts, query) ->
    let trees = E.run_query query in
    match trees with
    | [] ->
      F.prim `P [] [
        F.info [] [txt "Query returned no results"]
      ]
    | _ ->
      render_nodes begin
        trees |> List.map @@ fun (tree : Sem.tree) ->
        let addr = tree.fm.addr in
        let opts = Sem.{expanded = false; show_heading = true; title_override = None; taxon_override = None; toc = false; numbered = false; show_metadata = true} in
        Range.locate_opt None @@ Sem.Transclude (opts, addr)
      end

and render_nodes nodes =
  F.null @@ List.map render_located nodes


and render_transclusion ~opts (tree : Sem.tree) =
  let current = Current_addr.read () in
  let update old_ancestors = current :: old_ancestors in
  Ancestors.scope update @@ fun () ->
  render_tree ~opts tree

and render_internal_link ~title ~modifier ~addr ~dest =
  let url = E.route addr in
  let ancestors = Ancestors.read () in
  let dest_title =
    dest.fm.title |> Option.map @@
    Render_util.expand_title_with_parents ~ancestors dest.fm
  in
  let target_title_attr =
    match dest_title with
    | None -> F.null_
    | Some t ->
      let title_string =
        String_util.sentence_case @@
        Render_text.Printer.contents @@
        Render_text.render t
      in
      F.title_ "%s" title_string
  in
  let title =
    title
    |> Option.fold ~none:dest_title ~some:Option.some
    |> Option.map (Sem.apply_modifier modifier)
    |> Option.value ~default:[Range.locate_opt None @@ Sem.Text "Untitled"]
  in
  F.link [
    F.href "%s" url;
    F.type_ "local";
    F.addr_ "%s" (Format.asprintf "%a" pp_addr addr);
  ] [render_nodes title]

and render_external_link ~title ~modifier ~url =
  let title =
    title
    |> Option.map (Sem.apply_modifier modifier)
    |> Option.value ~default:[Range.locate_opt None @@ Sem.Text url]
  in
  F.link [
    F.href "%s" url;
    F.type_ "external"
  ] [render_nodes title]

and render_author_name author =
  let exception Untitled in
  try
    match E.get_doc author with
    | None -> raise Untitled
    | Some biotree ->
      let addr = biotree.fm.addr in
      let url = E.route addr in
      F.link [
        F.href "%s" url;
        F.type_ "local";
        F.addr_ "%s" (Format.asprintf "%a" pp_addr addr)
      ] [
        match biotree.fm.title with
        | None -> raise Untitled
        | Some title -> render_nodes title
      ]
  with Untitled ->
    txt "%s" (Format.asprintf "%a" pp_addr author)

and render_author author =
  F.author [] [render_author_name author]

and render_contributor contributor =
  F.contributor [] [render_author_name contributor]

and render_authors ~contributors ~authors =
  match authors, contributors with
  | [], [] -> F.null []
  | _, _ ->
    F.authors [] [
      F.null @@ List.map render_author authors;
      F.null @@ List.map render_contributor contributors
    ]

and render_meta (key, body) =
  F.meta [F.name "%s" key] [
    render_nodes body
  ]

and render_last_changed (fm : Sem.frontmatter) =
  let addr = fm.addr in
  let date = E.last_changed addr in
  date |> optional @@ fun date -> F.last_changed [] [render_date date]

and render_frontmatter ~opts (fm : Sem.frontmatter) =
  let anchor = string_of_int @@ Oo.id (object end) in
  let contributors = E.contributors fm.addr in
  let authors = fm.authors in

  F.frontmatter [] [
    F.anchor [] "%s" anchor;

    begin
      let taxon =
        match Sem.(opts.taxon_override) with
        | Some taxon -> Some taxon
        | None -> fm.taxon
      in
      match taxon with
      | None -> F.null []
      | Some taxon -> F.taxon [] "%s" @@ String_util.sentence_case taxon
    end;

    begin
      let addr = fm.addr in
      F.null [
        F.addr [] "%s" (Format.asprintf "%a" pp_addr addr);
        F.route [] "%s" @@ E.route addr
      ]
    end;

    begin
      fm.source_path |> optional @@ fun path ->
      F.source_path [] "%s" path
    end;

    render_title ~opts fm;
    render_dates fm.dates;
    render_authors ~contributors ~authors;
    fm.number |> optional @@ F.number [] "%s";
    begin
      fm.designated_parent |> optional @@ fun addr ->
      F.parent [] "%s" (Format.asprintf "%a" pp_addr addr)
    end;
    F.null @@ List.map render_meta fm.metas;
    render_last_changed fm
  ]

and render_mainmatter nodes =
  F.mainmatter [] [render_nodes nodes]

and render_backmatter (addr : addr) =
  let opts =
    {Sem.default_transclusion_opts with
     numbered = false}
  in
  let render_trees = List.map (render_tree ~opts) in
  F.backmatter [] [
    F.contributions [] @@ render_trees @@ E.contributions addr;
    F.context [] @@ render_trees @@ E.parents addr;
    F.related [] @@ render_trees @@ E.related addr;
    F.backlinks [] @@ render_trees @@ E.backlinks addr;
    F.references [] @@ render_trees @@ E.bibliography addr
  ]

and render_title ~opts (fm : Sem.frontmatter) =
  let ancestors = Ancestors.read () in
  let title =
    match opts.title_override with
    | Some title -> Some title
    | None ->
      fm.title |> Option.map @@
      Render_util.expand_title_with_parents ~ancestors fm
  in
  title |> optional @@ fun title ->
  F.title [] [render_nodes @@ Sem.sentence_case title]

and render_tree ?(backmatter = false) ~opts (tree : Sem.tree) =
  Current_addr.run ~env:tree.fm.addr @@ fun  () ->
  let ancestors = Ancestors.read () in
  F.register_ns F.tree [
    F.toc opts.toc;
    F.numbered opts.numbered;
    F.show_heading opts.show_heading;
    F.show_metadata opts.show_metadata;
    F.expanded opts.expanded;
    F.root @@ E.is_root tree.fm.addr
  ] [
    render_frontmatter ~opts tree.fm;
    begin
      match tree.fm.addr with
      | addr when List.mem addr ancestors ->
        F.mainmatter [] [
          F.prim `P [] [
            F.info [] [txt "Transclusion cycle"]
          ]
        ]
      | addr ->
        let cache = Mainmatter_cache.get () in
        match Addr_map.find_opt addr cache with
        | Some cached -> cached
        | None ->
          let result = render_mainmatter tree.body in
          Mainmatter_cache.modify (Addr_map.add addr result);
          result
    end;
    match backmatter with
    | true -> render_backmatter tree.fm.addr
    | _ -> F.null []
  ]

let render_tree_top (tree : Sem.tree) =
  Ancestors.run ~env:[] @@ fun () ->
  let env = Xmlns_map.assoc ~prefix:F.reserved_prefix ~xmlns:F.forester_xmlns Xmlns_map.empty in
  Xmlns_prefixes.run ~env @@ fun () ->
  render_tree ~backmatter:true ~opts:Sem.default_transclusion_opts tree

let with_mainmatter_cache kont =
  Mainmatter_cache.run ~init:Addr_map.empty kont
diff --git a/lib/render/Serialise_xml_tree.ml b/lib/render/Serialise_xml_tree.ml
new file mode 100644
index 0000000..21d4e75
--- /dev/null
+++ b/lib/render/Serialise_xml_tree.ml
@@ -0,0 +1,169 @@
open Forester_prelude
open Forester_core

module X = Xml_tree
module F = Xml_forester
module P = Pure_html

module String_map = Map.Make (String)

module Mainmatter_cache = Algaeff.State.Make (struct type t = P.node String_map.t end)

let render_xml_qname =
  function
  | X.{prefix = ""; uname; _} -> uname
  | X.{prefix; uname; _} -> Format.sprintf "%s:%s" prefix uname

let render_xml_attr X.{key; value} =
  P.string_attr (render_xml_qname key) "%s" value

let render_date (X.Date date) =
  F.date [
    date.href |> F.optional_ @@ F.href "%s"
  ] [
    F.year [] "%i" date.year;
    date.month |> F.optional @@ F.month [] "%i";
    date.day |> F.optional @@ F.day [] "%i"
  ]

let rec render_tree (X.Tree tree) =
  F.tree [
    F.toc tree.options.toc;
    F.numbered tree.options.numbered;
    F.show_heading tree.options.show_heading;
    F.show_metadata tree.options.show_metadata;
    F.expanded tree.options.expanded;
    F.root tree.options.root;
    P.string_attr ("xmlns:" ^ F.reserved_prefix) "%s" F.forester_xmlns
  ] [
    render_frontmatter tree.frontmatter;
    begin
      let cache = Mainmatter_cache.get () in
      let key = tree.frontmatter.route in
      match String_map.find_opt key cache with
      | Some cached -> cached
      | None ->
        let result = render_mainmatter tree.mainmatter in
        Mainmatter_cache.modify (String_map.add key result);
        result
    end;
    tree.backmatter |> F.optional render_backmatter
  ]

and render_frontmatter (fm : _ X.frontmatter) =
  F.frontmatter [] [
    F.anchor [] "%s" fm.anchor;
    F.addr [] "%s" fm.addr;
    F.route [] "%s" fm.route;
    fm.title |> Option.map render_content |> F.optional @@ F.title [];
    fm.taxon |> F.optional @@ F.taxon [] "%s";
    fm.source_path |> F.optional @@ F.source_path [] "%s";
    fm.dates |> List.map render_date |> F.null;
    fm.attributions |> List.map render_attribution_elt |> F.null;
    fm.number |> F.optional @@ F.number [] "%s";
    fm.designated_parent |> F.optional @@ F.parent [] "%s";
    fm.metas |> List.map render_meta |> F.null
  ]

and render_mainmatter mm =
  F.mainmatter [] @@ render_content mm

and render_backmatter (bm : _ X.backmatter)  =
  F.backmatter [] @@ List.map render_backmatter_elt bm

and render_backmatter_elt =
  let render_trees trees =
    trees |> List.map @@ fun tree ->
    render_tree @@ X.Tree tree
  in
  function
  | Contributions trees ->
    F.contributions [] @@ render_trees trees
  | Context trees ->
    F.context [] @@ render_trees trees
  | Related trees ->
    F.related [] @@ render_trees trees
  | Backlinks trees ->
    F.backlinks [] @@ render_trees trees
  | References trees ->
    F.references [] @@ render_trees trees

and render_meta (Meta meta) =
  F.meta [F.name "%s" meta.key] @@
  render_content meta.body

and render_attribution_elt =
  function
  | X.Author x ->
    F.author [] @@ render_content x
  | X.Contributor x ->
    F.contributor [] @@ render_content x

and render_content (X.Content xs) =
  List.map render_content_node xs


and render_content_node =
  function
  | X.Text x ->
    P.txt "%s" x
  | X.CDATA x ->
    P.txt ~raw:true "<![CDATA[%s]]>" x
  | X.Prim (p, x) ->
    F.prim p [] @@ render_content x
  | X.Xml_tag {name; attrs; content} ->
    P.std_tag
      (render_xml_qname name)
      (List.map render_xml_attr attrs)
      (render_content content)
  | X.Subtree tree ->
    render_tree tree
  | X.Ref ref ->
    F.ref [
      F.addr_ "%s" ref.addr;
      F.href "%s" ref.href;
      ref.taxon |> F.optional_ @@ F.taxon_ "%s";
      ref.number |> F.optional_ @@ F.number_ "%s"
    ]
  | X.Link link ->
    let type_ =
      match link.type_ with
      | `Local -> "local"
      | `External -> "external"
    in
    F.link [
      F.type_ "%s" type_;
      F.href "%s" link.href;
      link.addr |> F.optional_ @@ F.addr_ "%s";
      link.title |> F.optional_ @@ F.title_ "%s"
    ] @@ render_content link.content
  | X.TeX tex ->
    let display =
      match tex.display with
      | `Inline -> "inline"
      | `Block -> "block"
    in
    F.tex [F.display "%s" display] "<![CDATA[%s]]>" tex.body
  | X.Img img ->
    F.img [F.src "%s" img.src]
  | X.Embedded_tex emb ->
    F.embedded_tex [] [
      F.embedded_tex_preamble [] "<![CDATA[%s]]>" emb.preamble;
      F.embedded_tex_body [] "<![CDATA[%s]]>" emb.source
    ]
  | X.Info x ->
    F.info [] [P.txt "%s" x]


let pp ?stylesheet fmt tree =
  Format.fprintf fmt {|<?xml version="1.0" encoding="UTF-8"?>|};
  Format.pp_print_newline fmt ();
  begin
    stylesheet |> Option.iter @@ fun uri ->
    Format.fprintf fmt "<?xml-stylesheet type=\"text/xsl\" href=\"%s\"?>" uri
  end;
  Format.pp_print_newline fmt ();
  P.pp_xml fmt @@ render_tree tree

let run kont =
  Mainmatter_cache.run ~init:String_map.empty kont
diff --git a/lib/render/Serialise_xml_tree.mli b/lib/render/Serialise_xml_tree.mli
new file mode 100644
index 0000000..0487f56
--- /dev/null
+++ b/lib/render/Serialise_xml_tree.mli
@@ -0,0 +1,7 @@
open Forester_core

val run : (unit -> 'a) -> 'a
(** Initialises a cache for tree mainmatters in the given scope. *)

val pp : ?stylesheet:string -> Format.formatter -> Xml_tree.tree_ -> unit
(** Must be called within the scope of {!run}. *)
\ No newline at end of file
diff --git a/lib/render/Xml_forester.ml b/lib/render/Xml_forester.ml
index fbb48c2..2d79e95 100644
--- a/lib/render/Xml_forester.ml
+++ b/lib/render/Xml_forester.ml
@@ -3,14 +3,27 @@ open Pure_html
let reserved_prefix = "fr"
let forester_xmlns = "http://www.jonmsterling.com/jms-005P.xml"


let null = HTML.null
let null_ = HTML.null_

let optional kont opt =
  match opt with
  | Some x -> kont x
  | None -> null []

let optional_ kont opt =
  match opt with
  | Some x -> kont x
  | None -> null_


let register_ns tag attrs =
  let f_xmlns = string_attr ("xmlns:" ^ reserved_prefix) "%s" forester_xmlns in
  tag (f_xmlns :: attrs)

let add_ns name = Format.sprintf "%s:%s" reserved_prefix name

let null = HTML.null
let null_ = HTML.null_

let f_std_tag name = std_tag @@ add_ns name
let f_text_tag name = text_tag @@ add_ns name
diff --git a/lib/render/Xml_forester.mli b/lib/render/Xml_forester.mli
index 6ae97bf..fdc63a9 100644
--- a/lib/render/Xml_forester.mli
+++ b/lib/render/Xml_forester.mli
@@ -6,6 +6,9 @@ val forester_xmlns : string

val register_ns : std_tag -> std_tag

val optional : ('a -> node) -> 'a option -> node
val optional_ : ('a -> attr) -> 'a option -> attr

val null : node list -> node
val null_ : attr

-- 
2.43.4
Reply to thread Export thread (mbox)