Hello all,
I've written a basic bibtex importer. It's not perfect, but I think that
it's fine if the generated trees take a small amount of manual editing
before they compile correctly; this still saves a lot of typing!
One notable limitation is that this only works with .bib files
containing precisely one entry. This is the format that the .bib files
generated by [pubs](https://github.com/pubs/pubs) have, so this is
extremely useful for me, but may be less useful for people who have .bib
files with many entries.
Any suggestions for how to design this better are more than welcome.
-Owen
---
.gitignore | 3 ++-
bin/forester/main.ml | 21 +++++++++++++++++---
dune-project | 4 +++-
forester.opam | 1 +
lib/core/Reporter.ml | 4 ++++
lib/frontend/Bibtex_import.ml | 36 +++++++++++++++++++++++++++++++++++
lib/frontend/Forester.ml | 22 +++++++++++++++++++++
lib/frontend/Forester.mli | 6 ++++++
lib/frontend/dune | 3 ++-
9 files changed, 94 insertions(+), 6 deletions(-)
create mode 100644 lib/frontend/Bibtex_import.ml
diff --git a/.gitignore b/.gitignore
index 55ca373..cf14725 100644
--- a/.gitignore
+++ b/.gitignore
@@ -136,4 +136,5 @@ setup.log
_opam/
# for Nix
-result/
+result
+.direnv
diff --git a/bin/forester/main.ml b/bin/forester/main.ml
index 554f6b7..9a52832 100644
--- a/bin/forester/main.ml
@@ -37,7 +37,13 @@ let build ~env config_filename dev no_theme =
let@ dir_to_copy = List.iter @~ dirs_to_copy in
Forester.copy_contents_of_dir ~env @@ path_of_dir ~env dir_to_copy
-let new_tree ~env config_filename dest_dir prefix template random =
+let bib_or_prefix prefix bibfile = match (prefix, bibfile) with
+ | (Some prefix, None) -> `Prefix prefix
+ | (None, Some bibfile) -> `Bibfile bibfile
+ | _ -> Reporter.fatalf Argument_error "precisely one of --prefix or --bib is required"
+
+let new_tree ~env config_filename dest_dir prefix template random bibfile =
+ let tree_type = bib_or_prefix prefix bibfile in
let@ () = Reporter.silence in
let config = Forester_frontend.Config.parse_forest_config_file config_filename in
let tree_dirs = paths_of_dirs ~env config.trees in
@@ -46,7 +52,9 @@ let new_tree ~env config_filename dest_dir prefix template random =
Forester.plant_raw_forest_from_dirs ~env ~host: config.host ~dev: true ~tree_dirs ~asset_dirs ~foreign_paths;
let mode = if random then `Random else `Sequential in
let dest = path_of_dir ~env dest_dir in
- let addr = Forester.create_tree ~env ~dest ~prefix ~template ~mode in
+ let addr = match tree_type with
+ | `Prefix prefix -> Forester.create_tree ~env ~dest ~prefix ~template ~mode
+ | `Bibfile bibfile -> Forester.create_bib_tree ~env ~dest ~bibfile in
Format.printf "%s/%s.tree\n" dest_dir addr
let complete ~env config_filename title =
@@ -173,7 +181,7 @@ let build_cmd ~env =
let new_tree_cmd ~env =
let arg_prefix =
let doc = "The namespace prefix for the created tree." in
- Arg.required @@
+ Arg.value @@
Arg.opt (Arg.some Arg.string) None @@
Arg.info ["prefix"] ~docv: "XXX" ~doc
in
@@ -193,6 +201,12 @@ let new_tree_cmd ~env =
let doc = "True if the new tree should have id assigned randomly rather than sequentially" in
Arg.value @@ Arg.flag @@ Arg.info ["random"] ~doc
in
+ let arg_bibfile =
+ let doc = "Create a reference tree based on this .bib file" in
+ Arg.value @@
+ Arg.opt (Arg.some Arg.string) None @@
+ Arg.info ["bib"] ~docv: "BIBFILE" ~doc
+ in
let doc = "Create a new tree." in
let info = Cmd.info "new" ~version ~doc in
Cmd.v
@@ -204,6 +218,7 @@ let new_tree_cmd ~env =
$ arg_prefix
$ arg_template
$ arg_random
+ $ arg_bibfile
)
let complete_cmd ~env =
diff --git a/dune-project b/dune-project
index 5bde731..0a67239 100644
--- a/dune-project
+++ b/dune-project
@@ -79,4 +79,6 @@
(lsp
(>= 0.19.0))
(ppx_yojson_conv
- (>= 0.17.0))))
+ (>= 0.17.0))
+ (talaria-bibtex
+ (>= 0.5))))
diff --git a/forester.opam b/forester.opam
index 11c9428..e03287f 100644
--- a/forester.opam
+++ b/forester.opam
@@ -34,6 +34,7 @@ depends: [
"cid" {>= "0.1.0"}
"lsp" {>= "0.19.0"}
"ppx_yojson_conv" {>= "0.17.0"}
+ "talaria-bibtex" {>= "0.5"}
"odoc" {with-doc}
]
build: [
diff --git a/lib/core/Reporter.ml b/lib/core/Reporter.ml
index 2546d6e..a6b5953 100644
--- a/lib/core/Reporter.ml
+++ b/lib/core/Reporter.ml
@@ -28,6 +28,8 @@ module Message = struct
| Broken_link
| IO_error
| Log
+ | Invalid_bibtex_import
+ | Argument_error
[@@deriving show]
let default_severity : t -> Asai.Diagnostic.severity = function
@@ -51,6 +53,8 @@ module Message = struct
| Resource_not_found -> Error
| Broken_link -> Warning
| IO_error -> Error
+ | Invalid_bibtex_import -> Error
+ | Argument_error -> Error
let short_code : t -> string =
show
diff --git a/lib/frontend/Bibtex_import.ml b/lib/frontend/Bibtex_import.ml
new file mode 100644
index 0000000..b14f103
--- /dev/null
+++ b/lib/frontend/Bibtex_import.ml
@@ -0,0 +1,36 @@
+let format_tree b source =
+ let open Bibtex.Fields in
+ let buf = Buffer.create 1000 in
+ (b.%{ title.f } |> Option.iter @@ fun t -> Printf.bprintf buf "\\title{%s}\n" t);
+ (b.%{ year.f } |> Option.iter @@ fun y -> Printf.bprintf buf "\\date{%d}\n" y);
+ (b.%{ authors.f }
+ |> Option.iter @@
+ List.iter @@
+ fun a ->
+ Printf.bprintf buf "\\author{%s-%s}\n" (String.uncapitalize_ascii a.firstname) (String.uncapitalize_ascii a.lastname)
+ );
+ Printf.bprintf buf "\\taxon{reference}\n";
+ (b.%{ doi.f } |> Option.iter @@ fun d -> Printf.bprintf buf "\\meta{doi}{%s}\n" (doi.conv.to_ d));
+ Printf.bprintf buf "\\meta{bibtex}{\\verb>>|\n%s\n>>}\n" (String.trim source);
+ Buffer.contents buf
+
+let boring_words = ["the"; "a"; "an"; "on"]
+
+let tree_name b =
+ let open Bibtex.Fields in
+ let author = match b.%{authors.f} with
+ | Some (a :: _) -> (String.uncapitalize_ascii a.lastname)
+ | _ -> "noname" in
+ let title = match b.%{title.f} with
+ | Some t ->
+ (let words = String.split_on_char ' ' t |> List.map String.uncapitalize_ascii in
+ let first_good = List.find_opt (fun w -> not (List.mem w boring_words)) words in
+ match first_good with
+ | Some w -> w
+ | None -> t)
+ | None -> "" in
+ let year = match b.%{year.f} with
+ | Some y -> Printf.sprintf "%d" y
+ | None -> "noyear"
+ in
+ Printf.sprintf "%s-%s-%s" author year title
diff --git a/lib/frontend/Forester.ml b/lib/frontend/Forester.ml
index 709667c..566e679 100644
--- a/lib/frontend/Forester.ml
+++ b/lib/frontend/Forester.ml
@@ -83,6 +83,28 @@ let create_tree ~env ~dest ~prefix ~template ~mode =
EP.save ~create path @@ body ^ template_content;
next
+let expand_tilde path =
+ if String.length path > 0 && String.sub path 0 1 = "~" then
+ match Sys.getenv_opt "HOME" with
+ (* support ~dir as well as ~/dir *)
+ | Some home -> home ^ (if String.sub path 0 2 = "~/" then "" else "/") ^ String.sub path 1 (String.length path - 1)
+ | None -> Reporter.fatalf IO_error "HOME environment variable not set"
+ else
+ path
+
+let create_bib_tree ~env ~dest ~bibfile =
+ let source = EP.load @@ EP.(Eio.Stdenv.fs env / expand_tilde bibfile) in
+ let dtb = Bibtex.parse @@ Lexing.from_string source in
+ let _ = if Bibtex.Database.cardinal dtb == 1 then () else Reporter.fatalf Invalid_bibtex_import "expected bib file to contain exactly one entry" in
+ dtb |> Bibtex.Database.to_list |> List.hd |> fun (_, bibrecord) ->
+ let treename = Bibtex_import.tree_name bibrecord in
+ let fname = treename ^ ".tree" in
+ let content = Bibtex_import.format_tree bibrecord source in
+ let path = EP.(dest / fname) in
+ let create = `Exclusive 0o644 in
+ EP.save ~create path @@ content;
+ treename
+
let complete ~host prefix =
let@ article = Seq.filter_map @~ List.to_seq @@ FU.get_all_articles () in
let@ iri = Option.bind article.frontmatter.iri in
diff --git a/lib/frontend/Forester.mli b/lib/frontend/Forester.mli
index 9986269..b532f8d 100644
--- a/lib/frontend/Forester.mli
+++ b/lib/frontend/Forester.mli
@@ -44,6 +44,12 @@ val create_tree :
mode: [`Sequential | `Random] ->
string
+val create_bib_tree :
+ env: env ->
+ dest: dir ->
+ bibfile: string ->
+ string
+
val json_manifest :
host: string ->
home: string option ->
diff --git a/lib/frontend/dune b/lib/frontend/dune
index 477cb29..f9cfbdd 100644
--- a/lib/frontend/dune
+++ b/lib/frontend/dune
@@ -26,7 +26,8 @@
algaeff
str
unix
- iri))
+ iri
+ talaria-bibtex))
(env
(dev
--
2.47.0
> I've written a basic bibtex importer. It's not perfect, but I think that
> it's fine if the generated trees take a small amount of manual editing
> before they compile correctly; this still saves a lot of typing!
The selection of bibtex that talaria-bibtex supports is arbitrary,
small, and indicated by inscrutable errors so I suggest to not accept
this patch.
I may just inline the source of talaria-bibtex into the forester source
and then edit it heavily.
-Owen
> The selection of bibtex that talaria-bibtex supports is arbitrary,
> small, and indicated by inscrutable errors so I suggest to not accept
> this patch.
>
> I may just inline the source of talaria-bibtex into the forester source
> and then edit it heavily.
>
> -Owen
I have gone an alternative route and written a python script which works
quite well: https://github.com/olynch/bib2forester.
This python script also supports pulling directly from a doi, e.g.
python3 bib2forester.py -D XXX/XXXX trees/bib
will pull the bibtex from doi.org for XXX/XXXX and then export a tree
trees/bib using that bibtex. I hope that this script will be useful to
other forester users.
-Owen
On 30 Dec 2024, at 23:44, Owen Lynch wrote:
>> The selection of bibtex that talaria-bibtex supports is arbitrary,
>> small, and indicated by inscrutable errors so I suggest to not accept
>> this patch.
>>
>> I may just inline the source of talaria-bibtex into the forester
>> source
>> and then edit it heavily.
>>
>> -Owen
>
> I have gone an alternative route and written a python script which
> works
> quite well: https://github.com/olynch/bib2forester.
>
> This python script also supports pulling directly from a doi, e.g.
>
> python3 bib2forester.py -D XXX/XXXX trees/bib
>
> will pull the bibtex from doi.org for XXX/XXXX and then export a tree
> trees/bib using that bibtex. I hope that this script will be useful to
> other forester users.
>
> -Owen
Dear Owen,
Very sorry for the very long delays in getting back to you on this, and
other things. I took a two week rest, and now I’m embroiled in
postgrad admissions and start-of-term stuff. But I am dropping in to say
that I prefer your Python-based approach for now.
Best,
Jon