~jonsterling/forester-devel

This thread contains a patchset. You're looking at the original emails, but you may wish to use the patch review UI. Review patch
3 2

[PATCH] basic bibtex import support

Details
Message ID
<874j2ls3i3.fsf@owenlynch.org>
DKIM signature
pass
Download raw message
Patch: +94 -6
Hello all,

I've written a basic bibtex importer. It's not perfect, but I think that
it's fine if the generated trees take a small amount of manual editing
before they compile correctly; this still saves a lot of typing!

One notable limitation is that this only works with .bib files
containing precisely one entry. This is the format that the .bib files
generated by [pubs](https://github.com/pubs/pubs) have, so this is
extremely useful for me, but may be less useful for people who have .bib
files with many entries.

Any suggestions for how to design this better are more than welcome.

-Owen

---
 .gitignore                    |  3 ++-
 bin/forester/main.ml          | 21 +++++++++++++++++---
 dune-project                  |  4 +++-
 forester.opam                 |  1 +
 lib/core/Reporter.ml          |  4 ++++
 lib/frontend/Bibtex_import.ml | 36 +++++++++++++++++++++++++++++++++++
 lib/frontend/Forester.ml      | 22 +++++++++++++++++++++
 lib/frontend/Forester.mli     |  6 ++++++
 lib/frontend/dune             |  3 ++-
 9 files changed, 94 insertions(+), 6 deletions(-)
 create mode 100644 lib/frontend/Bibtex_import.ml

diff --git a/.gitignore b/.gitignore
index 55ca373..cf14725 100644
--- a/.gitignore
+++ b/.gitignore
@@ -136,4 +136,5 @@ setup.log
_opam/

# for Nix
result/
result
.direnv
diff --git a/bin/forester/main.ml b/bin/forester/main.ml
index 554f6b7..9a52832 100644
--- a/bin/forester/main.ml
@@ -37,7 +37,13 @@ let build ~env config_filename dev no_theme =
  let@ dir_to_copy = List.iter @~ dirs_to_copy in
  Forester.copy_contents_of_dir ~env @@ path_of_dir ~env dir_to_copy

let new_tree ~env config_filename dest_dir prefix template random =
let bib_or_prefix prefix bibfile = match (prefix, bibfile) with
    | (Some prefix, None) -> `Prefix prefix
    | (None, Some bibfile) -> `Bibfile bibfile
    | _ -> Reporter.fatalf Argument_error "precisely one of --prefix or --bib is required"

let new_tree ~env config_filename dest_dir prefix template random bibfile =
  let tree_type = bib_or_prefix prefix bibfile in
  let@ () = Reporter.silence in
  let config = Forester_frontend.Config.parse_forest_config_file config_filename in
  let tree_dirs = paths_of_dirs ~env config.trees in
@@ -46,7 +52,9 @@ let new_tree ~env config_filename dest_dir prefix template random =
  Forester.plant_raw_forest_from_dirs ~env ~host: config.host ~dev: true ~tree_dirs ~asset_dirs ~foreign_paths;
  let mode = if random then `Random else `Sequential in
  let dest = path_of_dir ~env dest_dir in
  let addr = Forester.create_tree ~env ~dest ~prefix ~template ~mode in
  let addr = match tree_type with
      | `Prefix prefix -> Forester.create_tree ~env ~dest ~prefix ~template ~mode
      | `Bibfile bibfile -> Forester.create_bib_tree ~env ~dest ~bibfile in
  Format.printf "%s/%s.tree\n" dest_dir addr

let complete ~env config_filename title =
@@ -173,7 +181,7 @@ let build_cmd ~env =
let new_tree_cmd ~env =
  let arg_prefix =
    let doc = "The namespace prefix for the created tree." in
    Arg.required @@
    Arg.value @@
    Arg.opt (Arg.some Arg.string) None @@
    Arg.info ["prefix"] ~docv: "XXX" ~doc
  in
@@ -193,6 +201,12 @@ let new_tree_cmd ~env =
    let doc = "True if the new tree should have id assigned randomly rather than sequentially" in
    Arg.value @@ Arg.flag @@ Arg.info ["random"] ~doc
  in
  let arg_bibfile =
    let doc = "Create a reference tree based on this .bib file" in
    Arg.value @@
    Arg.opt (Arg.some Arg.string) None @@
    Arg.info ["bib"] ~docv: "BIBFILE" ~doc
  in
  let doc = "Create a new tree." in
  let info = Cmd.info "new" ~version ~doc in
  Cmd.v
@@ -204,6 +218,7 @@ let new_tree_cmd ~env =
      $ arg_prefix
      $ arg_template
      $ arg_random
      $ arg_bibfile
    )

let complete_cmd ~env =
diff --git a/dune-project b/dune-project
index 5bde731..0a67239 100644
--- a/dune-project
+++ b/dune-project
@@ -79,4 +79,6 @@
  (lsp
    (>= 0.19.0))
  (ppx_yojson_conv
    (>= 0.17.0))))
    (>= 0.17.0))
  (talaria-bibtex
    (>= 0.5))))
diff --git a/forester.opam b/forester.opam
index 11c9428..e03287f 100644
--- a/forester.opam
+++ b/forester.opam
@@ -34,6 +34,7 @@ depends: [
  "cid" {>= "0.1.0"}
  "lsp" {>= "0.19.0"}
  "ppx_yojson_conv" {>= "0.17.0"}
  "talaria-bibtex" {>= "0.5"}
  "odoc" {with-doc}
]
build: [
diff --git a/lib/core/Reporter.ml b/lib/core/Reporter.ml
index 2546d6e..a6b5953 100644
--- a/lib/core/Reporter.ml
+++ b/lib/core/Reporter.ml
@@ -28,6 +28,8 @@ module Message = struct
    | Broken_link
    | IO_error
    | Log
    | Invalid_bibtex_import
    | Argument_error
  [@@deriving show]

  let default_severity : t -> Asai.Diagnostic.severity = function
@@ -51,6 +53,8 @@ module Message = struct
    | Resource_not_found -> Error
    | Broken_link -> Warning
    | IO_error -> Error
    | Invalid_bibtex_import -> Error
    | Argument_error -> Error

  let short_code : t -> string =
    show
diff --git a/lib/frontend/Bibtex_import.ml b/lib/frontend/Bibtex_import.ml
new file mode 100644
index 0000000..b14f103
--- /dev/null
+++ b/lib/frontend/Bibtex_import.ml
@@ -0,0 +1,36 @@
let format_tree b source =
  let open Bibtex.Fields in
  let buf = Buffer.create 1000 in
  (b.%{ title.f } |> Option.iter @@ fun t -> Printf.bprintf buf "\\title{%s}\n" t);
  (b.%{ year.f } |> Option.iter @@ fun y -> Printf.bprintf buf "\\date{%d}\n" y);
  (b.%{ authors.f }
    |> Option.iter @@
    List.iter @@
    fun a ->
      Printf.bprintf buf "\\author{%s-%s}\n" (String.uncapitalize_ascii a.firstname) (String.uncapitalize_ascii a.lastname)
  );
  Printf.bprintf buf "\\taxon{reference}\n";
  (b.%{ doi.f } |> Option.iter @@ fun d -> Printf.bprintf buf "\\meta{doi}{%s}\n" (doi.conv.to_ d));
  Printf.bprintf buf "\\meta{bibtex}{\\verb>>|\n%s\n>>}\n" (String.trim source);
  Buffer.contents buf

let boring_words = ["the"; "a"; "an"; "on"]

let tree_name b =
  let open Bibtex.Fields in
  let author = match b.%{authors.f} with
    | Some (a :: _) -> (String.uncapitalize_ascii a.lastname)
    | _ -> "noname" in
  let title = match b.%{title.f} with
    | Some t ->
       (let words = String.split_on_char ' ' t |> List.map String.uncapitalize_ascii in
       let first_good = List.find_opt (fun w -> not (List.mem w boring_words)) words in
       match first_good with
       | Some w -> w
       | None -> t)
    | None -> "" in
  let year = match b.%{year.f} with
    | Some y -> Printf.sprintf "%d" y
    | None -> "noyear"
  in
  Printf.sprintf "%s-%s-%s" author year title
diff --git a/lib/frontend/Forester.ml b/lib/frontend/Forester.ml
index 709667c..566e679 100644
--- a/lib/frontend/Forester.ml
+++ b/lib/frontend/Forester.ml
@@ -83,6 +83,28 @@ let create_tree ~env ~dest ~prefix ~template ~mode =
  EP.save ~create path @@ body ^ template_content;
  next

let expand_tilde path =
  if String.length path > 0 && String.sub path 0 1 = "~" then
    match Sys.getenv_opt "HOME" with
      (* support ~dir as well as ~/dir *)
    | Some home -> home ^ (if String.sub path 0 2 = "~/" then "" else "/") ^ String.sub path 1 (String.length path - 1)
    | None -> Reporter.fatalf IO_error "HOME environment variable not set"
  else
    path

let create_bib_tree ~env ~dest ~bibfile =
  let source = EP.load @@ EP.(Eio.Stdenv.fs env / expand_tilde bibfile) in
  let dtb = Bibtex.parse @@ Lexing.from_string source in
  let _ = if Bibtex.Database.cardinal dtb == 1 then () else Reporter.fatalf Invalid_bibtex_import "expected bib file to contain exactly one entry" in
  dtb |> Bibtex.Database.to_list |> List.hd |> fun (_, bibrecord) ->
    let treename = Bibtex_import.tree_name bibrecord in
    let fname = treename ^ ".tree" in
    let content = Bibtex_import.format_tree bibrecord source in
    let path = EP.(dest / fname) in
    let create = `Exclusive 0o644 in
    EP.save ~create path @@ content;
    treename

let complete ~host prefix =
  let@ article = Seq.filter_map @~ List.to_seq @@ FU.get_all_articles () in
  let@ iri = Option.bind article.frontmatter.iri in
diff --git a/lib/frontend/Forester.mli b/lib/frontend/Forester.mli
index 9986269..b532f8d 100644
--- a/lib/frontend/Forester.mli
+++ b/lib/frontend/Forester.mli
@@ -44,6 +44,12 @@ val create_tree :
  mode: [`Sequential | `Random] ->
  string

val create_bib_tree :
  env: env ->
  dest: dir ->
  bibfile: string ->
  string

val json_manifest :
  host: string ->
  home: string option ->
diff --git a/lib/frontend/dune b/lib/frontend/dune
index 477cb29..f9cfbdd 100644
--- a/lib/frontend/dune
+++ b/lib/frontend/dune
@@ -26,7 +26,8 @@
  algaeff
  str
  unix
  iri))
  iri
  talaria-bibtex))

(env
 (dev
--
2.47.0
Details
Message ID
<1735594997.d55k637ps4.astroid@red-special.none>
In-Reply-To
<874j2ls3i3.fsf@owenlynch.org> (view parent)
DKIM signature
pass
Download raw message
> I've written a basic bibtex importer. It's not perfect, but I think that
> it's fine if the generated trees take a small amount of manual editing
> before they compile correctly; this still saves a lot of typing!

The selection of bibtex that talaria-bibtex supports is arbitrary, 
small, and indicated by inscrutable errors so I suggest to not accept 
this patch.

I may just inline the source of talaria-bibtex into the forester source 
and then edit it heavily.

-Owen
Details
Message ID
<1735601920.krt220jhma.astroid@red-special.none>
In-Reply-To
<1735594997.d55k637ps4.astroid@red-special.none> (view parent)
DKIM signature
pass
Download raw message
> The selection of bibtex that talaria-bibtex supports is arbitrary, 
> small, and indicated by inscrutable errors so I suggest to not accept 
> this patch.
> 
> I may just inline the source of talaria-bibtex into the forester source 
> and then edit it heavily.
> 
> -Owen

I have gone an alternative route and written a python script which works 
quite well: https://github.com/olynch/bib2forester.

This python script also supports pulling directly from a doi, e.g.

  python3 bib2forester.py -D XXX/XXXX trees/bib

will pull the bibtex from doi.org for XXX/XXXX and then export a tree 
trees/bib using that bibtex. I hope that this script will be useful to 
other forester users.

-Owen
Details
Message ID
<4D1234D9-8C00-49EE-8993-116DC550665B@jonmsterling.com>
In-Reply-To
<1735601920.krt220jhma.astroid@red-special.none> (view parent)
Sender timestamp
1736507606
DKIM signature
pass
Download raw message
On 30 Dec 2024, at 23:44, Owen Lynch wrote:

>> The selection of bibtex that talaria-bibtex supports is arbitrary,
>> small, and indicated by inscrutable errors so I suggest to not accept
>> this patch.
>>
>> I may just inline the source of talaria-bibtex into the forester 
>> source
>> and then edit it heavily.
>>
>> -Owen
>
> I have gone an alternative route and written a python script which 
> works
> quite well: https://github.com/olynch/bib2forester.
>
> This python script also supports pulling directly from a doi, e.g.
>
>   python3 bib2forester.py -D XXX/XXXX trees/bib
>
> will pull the bibtex from doi.org for XXX/XXXX and then export a tree
> trees/bib using that bibtex. I hope that this script will be useful to
> other forester users.
>
> -Owen

Dear Owen,

Very sorry for the very long delays in getting back to you on this, and 
other things. I took a two week rest, and now I’m embroiled in 
postgrad admissions and start-of-term stuff. But I am dropping in to say 
that I prefer your Python-based approach for now.

Best,
Jon
Reply to thread Export thread (mbox)