~detegr/bqnlsp

bqnlsp: A bunch of things v1 PROPOSED

This contains a bunch of things:

- updating the BQN submodule;
- making file/state system values function better;
- replacing the UTF-32↔16 conversion logic, fixing some crashes;
- bytecode-based variable operations: goto-definition, find-references,
rename, highlight, list all symbols.

(I have no clue if I'm sourcehutting properly)

dzaima (7):
  Wrap each build step in build.bqn in a block for clarity
  Update & refactor BQN compiler invocation
  More proper compilation result checking
  Variable highlighting, rename, goto
  Implement go to references
  Implement symbol listing
  Optimized position conversion

 BQN                    |   2 +-
 build.bqn              |  34 +++--
 lsp/src/bqn.rs         | 298 ++++++++++++++++++++++++++++-------------
 lsp/src/bytecode.rs    | 236 ++++++++++++++++++++++++++++++++
 lsp/src/diagnostics.rs |  76 ++++-------
 lsp/src/highlight.rs   | 112 ++++++++++++++++
 lsp/src/main.rs        | 239 ++++++++++++++++++++++++++++++++-
 lsp/src/utils.rs       | 218 ++++++++++++++++++++++++++++++
 8 files changed, 1051 insertions(+), 164 deletions(-)
 create mode 100644 lsp/src/bytecode.rs
 create mode 100644 lsp/src/highlight.rs
 create mode 100644 lsp/src/utils.rs

-- 
2.45.2
Export patchset (mbox)
How do I use this?

Copy & paste the following snippet into your terminal to import this patchset into git:

curl -s https://lists.sr.ht/~detegr/bqnlsp/patches/56083/mbox | git am -3
Learn more about email & git

[PATCH bqnlsp 1/7] Wrap each build step in build.bqn in a block for clarity Export this patch

From: dzaima <dzaimagit@gmail.com>

---
 build.bqn | 34 ++++++++++++++++++++--------------
 1 file changed, 20 insertions(+), 14 deletions(-)

diff --git a/build.bqn b/build.bqn
index 2e3919e..d853ded 100755
--- a/build.bqn
@@ -10,19 +10,25 @@ env ← "env"‿"-S"‿("RUSTFLAGS=""-L"∾libcbqn∾""" LD_LIBRARY_PATH="""∾l

(Fail∘"BQN directory not found."‿"Try running `git submodule update --init --recursive` first.")⍟(¬•file.Exists) "BQN/src"

•Out "Building genhelp"
code‿out‿err ← •SH env∾"cargo"‿"build"‿"--release"‿"--bin"‿"genhelp"
(Fail∘⋈∘"Invalid path to libcbqn.so")⍟(+´"cannot find -lcbqn"⍷err) @
(Fail∘"Failed to build genhelp:"‿err)⍟{𝕩≠0} code

•Out "Generating help pages"
code‿out‿err ↩ •SH env∾"cargo"‿"run"‿"--bin"‿"genhelp"‿"./BQN"‿"./lsp/src/help"

(Fail∘"Failed to generate help pages:"‿err)⍟{𝕩≠0} code

•Out "Building bqnlsp"
code‿out‿err ↩ •SH env∾"cargo"‿"build"‿"--release"‿"--bin"‿"bqnlsp"
(Fail∘⋈∘"Invalid path to libcbqn.so")⍟(+´"cannot find -lcbqn"⍷err) @
(Fail∘"Failed to build bqnlsp:"‿err)⍟{𝕩≠0} code
{
  •Out "Building genhelp"
  code‿out‿err ← •SH env∾"cargo"‿"build"‿"--release"‿"--bin"‿"genhelp"
  (Fail∘⋈∘"Invalid path to libcbqn.so")⍟(∨´"cannot find -lcbqn"⍷err) @
  (Fail∘"Failed to build genhelp:"‿err)⍟{𝕩≠0} code
}

{
  •Out "Generating help pages"
  code‿out‿err ← •SH env∾"cargo"‿"run"‿"--bin"‿"genhelp"‿"./BQN"‿"./lsp/src/help"

  (Fail∘"Failed to generate help pages:"‿err)⍟{𝕩≠0} code
}

{
  •Out "Building bqnlsp"
  code‿out‿err ← •SH env∾"cargo"‿"build"‿"--release"‿"--bin"‿"bqnlsp"
  (Fail∘⋈∘"Invalid path to libcbqn.so")⍟(∨´"cannot find -lcbqn"⍷err) @
  (Fail∘"Failed to build bqnlsp:"‿err)⍟{𝕩≠0} code
}

•Out "Built: target/release/bqnlsp"
-- 
2.45.2

[PATCH bqnlsp 2/7] Update & refactor BQN compiler invocation Export this patch

From: dzaima <dzaimagit@gmail.com>

---
 BQN                    |  2 +-
 lsp/src/bqn.rs         | 31 ++++++++++++++++++++-----------
 lsp/src/diagnostics.rs |  2 +-
 3 files changed, 22 insertions(+), 13 deletions(-)

diff --git a/BQN b/BQN
index 5dbe5b5..c40d0af 160000
--- a/BQN
+++ b/BQN
@@ -1 +1 @@
Subproject commit 5dbe5b502b760b5d87c043b5ab836751b2c3e5e8
Subproject commit c40d0afc1344883fc17a2255cd393d130ad0f291
diff --git a/lsp/src/bqn.rs b/lsp/src/bqn.rs
index 9ab4c57..511c25f 100644
--- a/lsp/src/bqn.rs
+++ b/lsp/src/bqn.rs
@@ -31,17 +31,26 @@ pub fn compile(code: &str) -> Result<BQNResult, cbqn::Error> {

    let (compiler, prims) = COMPILER.get_or_init(|| {
        let glyphs = eval(GLYPHS_SRC).unwrap();
        let glyph_strs = glyphs
            .to_bqnvalue_vec()
            .unwrap()
            .into_iter()
            .map(|v| format!(r#""{}""#, v.to_string().unwrap()))
            .collect::<Vec<String>>()
            .join("‿");
        let compiler_src = COMPILER_SRC.replace("•args", &glyph_strs);
        let compiler = eval(&compiler_src).unwrap();
        let compiler = BQN!("{𝕏⎊{𝕊: •CurrentError@}}", compiler).unwrap();
        let prims_system = BQN!(r#"{(∾•BQN∘⋈¨¨𝕩)‿(""‿"."‿""⊸•BQN¨'•'⊸∾¨)}"#, glyphs).unwrap();

        let compiler = BQN!(
            glyphs.clone(),
            r#"{
            compiler ← ⟨"", "", 𝕨⟩ •BQN 𝕩
            Compiler⎊{𝕊: •CurrentError@}
        }"#,
            COMPILER_SRC
        )
        .unwrap();

        // check •listsys to not invoke potentially-context-dependent system values like •state/•name/•path/•args
        let prims_system = BQN!(
            r#"{⟨
            •BQN∘⋈¨ ∾𝕩,
            {(""‿"."‿""⊸•BQN '•'⊸∾)¨⌾((¬𝕩∊•listsys)⊸/) 𝕩}
        ⟩}"#,
            glyphs
        )
        .unwrap();

        (compiler, prims_system)
    });
diff --git a/lsp/src/diagnostics.rs b/lsp/src/diagnostics.rs
index 48d7d5b..5d0c486 100644
--- a/lsp/src/diagnostics.rs
+++ b/lsp/src/diagnostics.rs
@@ -45,7 +45,7 @@ pub fn get_diagnostics(text: &str) -> Vec<Diagnostic> {
                    None,
                    error.clone(),
                    None,
                    None
                    None,
                )
            })
            .collect::<Vec<_>>(),
-- 
2.45.2

[PATCH bqnlsp 3/7] More proper compilation result checking Export this patch

From: dzaima <dzaimagit@gmail.com>

---
 lsp/src/bqn.rs | 154 +++++++++++++++++++++++++------------------------
 1 file changed, 79 insertions(+), 75 deletions(-)

diff --git a/lsp/src/bqn.rs b/lsp/src/bqn.rs
index 511c25f..afdb320 100644
--- a/lsp/src/bqn.rs
+++ b/lsp/src/bqn.rs
@@ -36,7 +36,7 @@ pub fn compile(code: &str) -> Result<BQNResult, cbqn::Error> {
            glyphs.clone(),
            r#"{
            compiler ← ⟨"", "", 𝕨⟩ •BQN 𝕩
            Compiler⎊{𝕊: •CurrentError@}
            (1 ⋈ Compiler)⎊{𝕊: 0 ⋈ •CurrentError@}
        }"#,
            COMPILER_SRC
        )
@@ -54,84 +54,88 @@ pub fn compile(code: &str) -> Result<BQNResult, cbqn::Error> {

        (compiler, prims_system)
    });
    let out = compiler.call2(prims, &BQNValue::from(code))?;
    let res = out.to_bqnvalue_vec()?;

    let invalid_program = res.len() == 2 || res[0].bqn_type() == BQNType::Character;
    if invalid_program {
        let span = match res[0].bqn_type() {
            BQNType::Number => {
                let v = res[0].to_f64()?;
                vec![v, v]
            }
            BQNType::Character => {
                let error = out.to_string()?;
                let words = error.split(' ').collect::<Vec<_>>();
                let mut span = vec![0, 0];
                if let Some(w) = words.iter().next_back() {
                    if w.starts_with("•") {
                        if let Some(new_span) = find_span(w, code) {
                            span = new_span;
    let out = compiler
        .call2(prims, &BQNValue::from(code))?
        .to_bqnvalue_vec()?;
    let res_val = &out[1];
    let res = res_val.to_bqnvalue_vec()?;
    match out[0].to_f64()? {
        1.0 => {
            let bytecode = res[0].to_f64_vec()?;
            let constants = res[1].to_bqnvalue_vec()?;

            let blocks = res[2]
                .to_bqnvalue_vec()?
                .into_iter()
                .map(|block| block.to_bqnvalue_vec().unwrap())
                .collect::<Vec<_>>();

            let bodies = res[3]
                .to_bqnvalue_vec()?
                .into_iter()
                .map(|v| {
                    let b = v.to_bqnvalue_vec().unwrap();
                    (
                        b[0].to_f64().unwrap(),
                        b[1].to_f64().unwrap(),
                        b[2].to_f64_vec().unwrap(),
                        b[3].to_f64_vec().unwrap(),
                    )
                })
                .collect::<Vec<_>>();

            let locs = res[4]
                .to_bqnvalue_vec()?
                .into_iter()
                .map(|v| v.to_f64_vec().unwrap())
                .collect::<Vec<_>>();

            let tokens = res[5].to_bqnvalue_vec()?;
            let tokens = (
                tokens[0].to_f64_vec()?,
                tokens[1].to_f64_vec()?,
                tokens[2].to_bqnvalue_vec()?,
                tokens[3].to_f64_vec()?,
                tokens[4].to_f64_vec()?,
            );

            Ok(BQNResult::Compiled(CompilerResult {
                bytecode,
                constants,
                blocks,
                bodies,
                locs,
                tokens,
            }))
        }
        _ => {
            let span = match res[0].bqn_type() {
                BQNType::Number => {
                    let v = res[0].to_f64()?;
                    vec![v, v]
                }
                BQNType::Character => {
                    let error = res_val.to_string()?;
                    let words = error.split(' ').collect::<Vec<_>>();
                    let mut span = vec![0, 0];
                    if let Some(w) = words.iter().next_back() {
                        if w.starts_with("•") {
                            if let Some(new_span) = find_span(w, code) {
                                span = new_span;
                            }
                        }
                    }
                    return Ok(BQNResult::Error { span, error });
                }
                return Ok(BQNResult::Error { span, error });
            }
            _ => res[0].to_f64_vec()?,
        };

        let error = res[1].to_string()?;
        Ok(BQNResult::Error {
            span: span.into_iter().map(|v| v as u32).collect(),
            error,
        })
    } else {
        let bytecode = res[0].to_f64_vec()?;
        let constants = res[1].to_bqnvalue_vec()?;

        let blocks = res[2]
            .to_bqnvalue_vec()?
            .into_iter()
            .map(|block| block.to_bqnvalue_vec().unwrap())
            .collect::<Vec<_>>();

        let bodies = res[3]
            .to_bqnvalue_vec()?
            .into_iter()
            .map(|v| {
                let b = v.to_bqnvalue_vec().unwrap();
                (
                    b[0].to_f64().unwrap(),
                    b[1].to_f64().unwrap(),
                    b[2].to_f64_vec().unwrap(),
                    b[3].to_f64_vec().unwrap(),
                )
                _ => res[0].to_f64_vec()?,
            };

            let error = res[1].to_string()?;
            Ok(BQNResult::Error {
                span: span.into_iter().map(|v| v as u32).collect(),
                error,
            })
            .collect::<Vec<_>>();

        let locs = res[4]
            .to_bqnvalue_vec()?
            .into_iter()
            .map(|v| v.to_f64_vec().unwrap())
            .collect::<Vec<_>>();

        let tokens = res[5].to_bqnvalue_vec()?;
        let tokens = (
            tokens[0].to_f64_vec()?,
            tokens[1].to_f64_vec()?,
            tokens[2].to_bqnvalue_vec()?,
            tokens[3].to_f64_vec()?,
            tokens[4].to_f64_vec()?,
        );

        Ok(BQNResult::Compiled(CompilerResult {
            bytecode,
            constants,
            blocks,
            bodies,
            locs,
            tokens,
        }))
        }
    }
}

-- 
2.45.2

[PATCH bqnlsp 4/7] Variable highlighting, rename, goto Export this patch

From: dzaima <dzaimagit@gmail.com>

---
 lsp/src/bqn.rs         | 153 +++++++++++++++++++++-----
 lsp/src/bytecode.rs    | 236 +++++++++++++++++++++++++++++++++++++++++
 lsp/src/diagnostics.rs |  40 ++-----
 lsp/src/highlight.rs   | 108 +++++++++++++++++++
 lsp/src/main.rs        | 154 +++++++++++++++++++++++++--
 lsp/src/utils.rs       | 192 +++++++++++++++++++++++++++++++++
 6 files changed, 817 insertions(+), 66 deletions(-)
 create mode 100644 lsp/src/bytecode.rs
 create mode 100644 lsp/src/highlight.rs
 create mode 100644 lsp/src/utils.rs

diff --git a/lsp/src/bqn.rs b/lsp/src/bqn.rs
index afdb320..c42f12e 100644
--- a/lsp/src/bqn.rs
+++ b/lsp/src/bqn.rs
@@ -1,11 +1,18 @@
use std::{cmp::max, collections::HashSet};

use cbqn::{eval, BQNType, BQNValue, BQN};
use once_cell::sync::OnceCell;
use regex::Regex;

#[derive(Debug)]
pub enum BQNResult {
    Error { span: Vec<u32>, error: String },
    Error {
        code: String,
        span: Vec<u32>,
        error: String,
    },
    Compiled(CompilerResult),
    InternalError(cbqn::Error),
    EmptyProgram,
}

@@ -13,18 +20,54 @@ static GLYPHS_SRC: &str = include_str!(concat!(env!("BQN_PATH"), "src/glyphs.bqn
static COMPILER_SRC: &str = include_str!(concat!(env!("BQN_PATH"), "src/c.bqn"));
static COMPILER: OnceCell<(BQNValue, BQNValue)> = OnceCell::new();

#[allow(unused)]
#[derive(Debug)]
pub struct Body {
    pub bytecode_start: usize,
    pub bytecode_end: usize, // exclusive range
    pub variable_count: usize,
    pub variable_ids: Vec<f64>,
    pub export_mask: Vec<f64>,
}

#[allow(unused)]
#[derive(Debug)]
pub struct Block {
    pub r#type: u64,
    pub immediate: bool,
    pub cases: Vec<Vec<usize>>,
}
impl Block {
    pub(crate) fn all_bodies(&self) -> Vec<usize> {
        let mut r: HashSet<usize> = HashSet::new();
        for v in &self.cases {
            for e in v {
                r.insert(*e);
            }
        }
        r.into_iter().collect()
    }
}

#[allow(unused)]
#[derive(Debug)]
pub struct CompilerResult {
    bytecode: Vec<f64>,
    constants: Vec<BQNValue>,
    blocks: Vec<Vec<BQNValue>>,
    bodies: Vec<(f64, f64, Vec<f64>, Vec<f64>)>,
    locs: Vec<Vec<f64>>,
    tokens: (Vec<f64>, Vec<f64>, Vec<BQNValue>, Vec<f64>, Vec<f64>),
    pub code: String,
    pub bytecode: Vec<u64>,
    pub constants: Vec<BQNValue>,
    pub blocks: Vec<Block>,
    pub bodies: Vec<Body>,
    pub locs: Vec<(usize, usize)>, // exclusive ranges
    pub tokens: (Vec<f64>, Vec<f64>, Vec<BQNValue>, Vec<f64>, Vec<f64>),
}

pub fn compile(code: &str) -> Result<BQNResult, cbqn::Error> {
pub fn compile(code: &str) -> BQNResult {
    match compile_impl(code) {
        Ok(v) => v,
        Err(err) => BQNResult::InternalError(err),
    }
}
fn compile_impl(code: &str) -> Result<BQNResult, cbqn::Error> {
    if code.is_empty() || code.chars().all(char::is_whitespace) {
        return Ok(BQNResult::EmptyProgram);
    }
@@ -35,9 +78,9 @@ pub fn compile(code: &str) -> Result<BQNResult, cbqn::Error> {
        let compiler = BQN!(
            glyphs.clone(),
            r#"{
            compiler ← ⟨"", "", 𝕨⟩ •BQN 𝕩
            (1 ⋈ Compiler)⎊{𝕊: 0 ⋈ •CurrentError@}
        }"#,
                compiler ← ⟨"", "", 𝕨⟩ •BQN 𝕩
                (1 ⋈ Compiler)⎊{𝕊: 0 ⋈ •CurrentError@}
            }"#,
            COMPILER_SRC
        )
        .unwrap();
@@ -45,9 +88,9 @@ pub fn compile(code: &str) -> Result<BQNResult, cbqn::Error> {
        // check •listsys to not invoke potentially-context-dependent system values like •state/•name/•path/•args
        let prims_system = BQN!(
            r#"{⟨
            •BQN∘⋈¨ ∾𝕩,
            {(""‿"."‿""⊸•BQN '•'⊸∾)¨⌾((¬𝕩∊•listsys)⊸/) 𝕩}
        ⟩}"#,
                •BQN∘⋈¨ ∾𝕩,
                {(""‿"."‿""⊸•BQN '•'⊸∾)¨⌾((¬𝕩∊•listsys)⊸/) 𝕩}
            ⟩}"#,
            glyphs
        )
        .unwrap();
@@ -61,33 +104,83 @@ pub fn compile(code: &str) -> Result<BQNResult, cbqn::Error> {
    let res = res_val.to_bqnvalue_vec()?;
    match out[0].to_f64()? {
        1.0 => {
            let bytecode = res[0].to_f64_vec()?;
            let bytecode = res[0]
                .to_f64_vec()?
                .into_iter()
                .map(|x| x as u64)
                .collect::<Vec<_>>();
            let constants = res[1].to_bqnvalue_vec()?;

            let blocks = res[2]
            let blocks: Vec<Block> = res[2]
                .to_bqnvalue_vec()?
                .into_iter()
                .map(|block| block.to_bqnvalue_vec().unwrap())
                .map(|block| {
                    let parts = block.to_bqnvalue_vec().unwrap();
                    let inds = &parts[2];
                    let cases = match inds.bqn_type() {
                        BQNType::Array => inds
                            .to_bqnvalue_vec()
                            .unwrap()
                            .into_iter()
                            .map(|a: BQNValue| {
                                a.to_f64_vec()
                                    .unwrap()
                                    .into_iter()
                                    .map(|x| x as usize)
                                    .collect::<Vec<_>>()
                            })
                            .collect(),
                        BQNType::Number => {
                            let ind = inds.to_f64().unwrap() as usize;
                            vec![vec![ind]]
                        }
                        _ => panic!("bad block case"),
                    };
                    Block {
                        r#type: parts[0].to_f64().unwrap() as u64,
                        immediate: parts[1].to_f64().unwrap() == 1.0,
                        cases,
                    }
                })
                .collect::<Vec<_>>();

            let mut prev_bytecode_start = bytecode.len();
            let bodies = res[3]
                .to_bqnvalue_vec()?
                .into_iter()
                .rev() // so that prev_bytecode_start can be tracked
                .map(|v| {
                    let b = v.to_bqnvalue_vec().unwrap();
                    (
                        b[0].to_f64().unwrap(),
                        b[1].to_f64().unwrap(),
                        b[2].to_f64_vec().unwrap(),
                        b[3].to_f64_vec().unwrap(),
                    )
                    let bytecode_start = b[0].to_f64().unwrap() as usize;
                    let bytecode_end = prev_bytecode_start;
                    debug_assert!(bytecode_start <= bytecode_end);
                    prev_bytecode_start = bytecode_start;
                    Body {
                        bytecode_start,
                        bytecode_end: max(bytecode_start, bytecode_end),
                        variable_count: b[1].to_f64().unwrap() as usize,
                        variable_ids: b[2].to_f64_vec().unwrap(),
                        export_mask: b[3].to_f64_vec().unwrap(),
                    }
                })
                .collect::<Vec<_>>()
                .into_iter() // reverse back, ensuring that body 0 is the outermost one
                .rev()
                .collect::<Vec<_>>();

            let locs = res[4]
                .to_bqnvalue_vec()?
            let mut loc_iter = res[4].to_bqnvalue_vec()?.into_iter().map(|v| {
                v.to_f64_vec()
                    .unwrap()
                    .into_iter()
                    .map(|x| x as usize)
                    .collect::<Vec<_>>()
            });
            let loc_s = loc_iter.next().unwrap();
            let loc_e = loc_iter.next().unwrap();
            let locs = loc_s
                .into_iter()
                .map(|v| v.to_f64_vec().unwrap())
                .zip(loc_e)
                .map(|(a, b)| (a, b + 1))
                .collect::<Vec<_>>();

            let tokens = res[5].to_bqnvalue_vec()?;
@@ -100,6 +193,7 @@ pub fn compile(code: &str) -> Result<BQNResult, cbqn::Error> {
            );

            Ok(BQNResult::Compiled(CompilerResult {
                code: code.to_owned(),
                bytecode,
                constants,
                blocks,
@@ -125,13 +219,18 @@ pub fn compile(code: &str) -> Result<BQNResult, cbqn::Error> {
                            }
                        }
                    }
                    return Ok(BQNResult::Error { span, error });
                    return Ok(BQNResult::Error {
                        code: code.to_owned(),
                        span,
                        error,
                    });
                }
                _ => res[0].to_f64_vec()?,
            };

            let error = res[1].to_string()?;
            Ok(BQNResult::Error {
                code: code.to_owned(),
                span: span.into_iter().map(|v| v as u32).collect(),
                error,
            })
diff --git a/lsp/src/bytecode.rs b/lsp/src/bytecode.rs
new file mode 100644
index 0000000..1b50cab
--- /dev/null
+++ b/lsp/src/bytecode.rs
@@ -0,0 +1,236 @@
use once_cell::sync::OnceCell;
use std::collections::HashMap;

use crate::bqn::{Body, CompilerResult};

#[derive(Debug)]
pub struct BytecodeInstr<'a> {
    pub pos: usize,
    pub opcode: u64,
    pub args: &'a [u64],
}
#[derive(Debug)]
pub struct VarInfo {
    pub index: usize,
    pub full_depth: usize,
    pub local_depth: usize,
}
impl<'a> BytecodeInstr<'a> {
    pub fn is_var_any(&self) -> bool {
        self.is_var_read() || self.is_var_mut()
    }
    pub fn is_var_read(&self) -> bool {
        self.opcode == Opcode::VARO || self.opcode == Opcode::VARU
    }
    pub fn is_var_mut(&self) -> bool {
        self.opcode == Opcode::VARM
    }

    // returns depth, index
    pub fn into_var(&self) -> Option<(usize, usize)> {
        if self.is_var_any() {
            return Some((self.args[0] as usize, self.args[1] as usize));
        }
        None
    }
    pub fn into_body_var(&self, body: &BodyInfo) -> Option<VarInfo> {
        self.into_var().map(|(depth, index)| VarInfo {
            index,
            full_depth: body.depth - depth,
            local_depth: depth,
        })
    }

    pub fn into_block_idx(&self) -> Option<usize> {
        if self.opcode == Opcode::DFND {
            return Some(self.args[0] as usize);
        }
        None
    }
}

static OPCODE_ARGN: OnceCell<HashMap<u64, usize>> = OnceCell::new();

pub struct Opcode;
impl Opcode {
    pub const PUSH: u64 = 0x00;
    pub const DFND: u64 = 0x01;
    pub const SYSV: u64 = 0x02;

    pub const POPS: u64 = 0x06;
    pub const RETN: u64 = 0x07;
    pub const RETD: u64 = 0x08;
    pub const LSTO: u64 = 0x0B;
    pub const LSTM: u64 = 0x0C;
    pub const ARMO: u64 = 0x0D;
    pub const ARMM: u64 = 0x0E;

    pub const FN1C: u64 = 0x10;
    pub const FN2C: u64 = 0x11;
    pub const FN1O: u64 = 0x12;
    pub const FN2O: u64 = 0x13;
    pub const TR2D: u64 = 0x14;
    pub const TR3D: u64 = 0x15;
    pub const CHKV: u64 = 0x16;
    pub const TR3O: u64 = 0x17;

    pub const MD1C: u64 = 0x1A;
    pub const MD2C: u64 = 0x1B;

    pub const VARO: u64 = 0x20;
    pub const VARM: u64 = 0x21;
    pub const VARU: u64 = 0x22;
    pub const DYNO: u64 = 0x26;
    pub const DYNM: u64 = 0x27;

    pub const PRED: u64 = 0x2A;
    pub const VFYM: u64 = 0x2B;
    pub const NOTM: u64 = 0x2C;
    pub const SETH: u64 = 0x2F;
    pub const SETN: u64 = 0x30;
    pub const SETU: u64 = 0x31;
    pub const SETM: u64 = 0x32;
    pub const SETC: u64 = 0x33;
    pub const FLDO: u64 = 0x40;
    pub const FLDM: u64 = 0x41;
    pub const ALIM: u64 = 0x42;
}

fn opcode_argn() -> &'static HashMap<u64, usize> {
    OPCODE_ARGN.get_or_init(|| {
        HashMap::from([
            (Opcode::PUSH, 1),
            (Opcode::DFND, 1),
            (Opcode::SYSV, 1),
            (Opcode::POPS, 0),
            (Opcode::RETN, 0),
            (Opcode::RETD, 0),
            (Opcode::LSTO, 1),
            (Opcode::LSTM, 1),
            (Opcode::ARMO, 1),
            (Opcode::ARMM, 1),
            (Opcode::FN1C, 0),
            (Opcode::FN2C, 0),
            (Opcode::FN1O, 0),
            (Opcode::FN2O, 0),
            (Opcode::TR2D, 0),
            (Opcode::TR3D, 0),
            (Opcode::CHKV, 0),
            (Opcode::TR3O, 0),
            (Opcode::MD1C, 0),
            (Opcode::MD2C, 0),
            (Opcode::VARO, 2),
            (Opcode::VARM, 2),
            (Opcode::VARU, 2),
            (Opcode::DYNO, 1),
            (Opcode::DYNM, 1),
            (Opcode::PRED, 0),
            (Opcode::VFYM, 0),
            (Opcode::NOTM, 0),
            (Opcode::SETH, 0),
            (Opcode::SETN, 0),
            (Opcode::SETU, 0),
            (Opcode::SETM, 0),
            (Opcode::SETC, 0),
            (Opcode::FLDO, 1),
            (Opcode::FLDM, 1),
            (Opcode::ALIM, 1),
        ])
    })
}

pub fn parse_bytecode(pos_start: usize, bytecode: &[u64]) -> Option<Vec<BytecodeInstr<'_>>> {
    let mut r: Vec<BytecodeInstr> = Vec::new();
    let opcodes = opcode_argn();

    let mut i: usize = 0;
    while i < bytecode.len() {
        let e = i + 1 + opcodes.get(&bytecode[i])?;
        if i > bytecode.len() {
            return None;
        }
        r.push(BytecodeInstr {
            pos: pos_start + i,
            opcode: bytecode[i],
            args: &bytecode[i + 1..e],
        });
        i = e;
    }

    Some(r)
}

#[derive(Debug)]
pub struct BodyInfo<'a> {
    pub idx: usize,
    pub depth: usize,
    pub body: &'a Body,
    pub instrs: &'a [BytecodeInstr<'a>],
    pub parent: Option<usize>,
    pub children: Vec<usize>,
}

pub fn body_info<'a>(
    bytecode: &'a Vec<BytecodeInstr>,
    compiled: &'a CompilerResult,
) -> Vec<BodyInfo<'a>> {
    let mut bc_pos = 0;

    let body_bytecode = compiled
        .bodies
        .iter()
        .map(|body| {
            let start = bc_pos;
            bc_pos += bytecode[bc_pos..]
                .iter()
                .take_while(|instr| instr.pos < body.bytecode_end)
                .count();

            (body, &bytecode[start..bc_pos])
        })
        .collect::<Vec<_>>();

    let mut res: Vec<BodyInfo<'a>> = body_bytecode
        .iter()
        .enumerate()
        .map(|(idx, (body, instrs))| BodyInfo {
            idx,
            depth: usize::MAX,
            body,
            instrs,
            parent: None,
            children: vec![],
        })
        .collect();

    fn add(
        idx: usize,
        res: &mut Vec<BodyInfo<'_>>,
        parent: Option<usize>,
        depth: usize,
        compiled: &CompilerResult,
    ) {
        let body = &mut res[idx];
        assert!(body.depth == usize::MAX);
        body.parent = parent;
        body.depth = depth;
        for instr in body.instrs {
            if let Some(block_idx) = instr.into_block_idx() {
                for body_id in compiled.blocks[block_idx].all_bodies() {
                    body.children.push(body_id);
                }
            }
        }
        for child in body.children.clone() {
            add(child, res, Some(idx), depth + 1, compiled);
        }
    }

    add(0, &mut res, None, 0, compiled);

    for body in &res {
        debug_assert!(body.depth != usize::MAX);
    }

    res
}
diff --git a/lsp/src/diagnostics.rs b/lsp/src/diagnostics.rs
index 5d0c486..61cfff0 100644
--- a/lsp/src/diagnostics.rs
+++ b/lsp/src/diagnostics.rs
@@ -1,9 +1,9 @@
use crate::bqn::{self, BQNResult};
use crate::{bqn::BQNResult, utils::text_range};
use tower_lsp::lsp_types::*;

pub fn get_diagnostics(text: &str) -> Vec<Diagnostic> {
    match bqn::compile(text) {
        Ok(BQNResult::Error { span, error }) => span
pub fn get_diagnostics(compiled: &BQNResult) -> Vec<Diagnostic> {
    match compiled {
        BQNResult::Error { code, span, error } => span
            .chunks(2)
            .map(|r| {
                let span_char_count = if r.len() == 1 {
@@ -12,34 +12,8 @@ pub fn get_diagnostics(text: &str) -> Vec<Diagnostic> {
                    (r[1] - r[0]) as usize + 1
                };

                let line = text
                    .chars()
                    .take(r[0] as usize)
                    .filter(|&c| c == '\n')
                    .collect::<String>()
                    .len() as u32;

                let col_start = text
                    .chars()
                    .take(r[0] as usize + span_char_count)
                    .collect::<String>()
                    .chars()
                    .rev()
                    .take_while(|&c| c != '\n')
                    .map(|c| c.len_utf16())
                    .sum::<usize>() as u32;

                let len = text
                    .chars()
                    .skip(r[0] as usize)
                    .take(span_char_count)
                    .map(|c| c.len_utf16())
                    .sum::<usize>() as u32;

                let from = col_start - len;

                Diagnostic::new(
                    Range::new(Position::new(line, from), Position::new(line, from + len)),
                    text_range(code, r[0] as usize, span_char_count),
                    Some(DiagnosticSeverity::ERROR),
                    None,
                    None,
@@ -49,10 +23,10 @@ pub fn get_diagnostics(text: &str) -> Vec<Diagnostic> {
                )
            })
            .collect::<Vec<_>>(),
        Ok(_) => vec![],
        Err(e) => {
        BQNResult::InternalError(e) => {
            eprintln!("{}", e);
            vec![]
        }
        _ => vec![],
    }
}
diff --git a/lsp/src/highlight.rs b/lsp/src/highlight.rs
new file mode 100644
index 0000000..474756d
--- /dev/null
+++ b/lsp/src/highlight.rs
@@ -0,0 +1,108 @@
use tower_lsp::lsp_types::{DocumentHighlight, DocumentHighlightKind};

use crate::{
    bqn::CompilerResult,
    bytecode::{self, parse_bytecode},
    utils,
};

#[derive(Debug, PartialEq)]
pub enum Kind {
    Definition,
    Write,
    Read,
}

pub fn get_highlight(compiled: &CompilerResult, pos: usize) -> Option<Vec<DocumentHighlight>> {
    Some(
        get_variables(compiled, pos)?
            .1
            .iter()
            .map(|(pos, kind)| DocumentHighlight {
                range: utils::opcode_range(compiled, *pos),
                kind: Some(if *kind == Kind::Read {
                    DocumentHighlightKind::READ
                } else {
                    DocumentHighlightKind::TEXT
                }),
            })
            .collect(),
    )
}

// returns (bytecode offset of under cursor, (bytecode offset, kind)))
pub fn get_variables(compiled: &CompilerResult, pos: usize) -> Option<(usize, Vec<(usize, Kind)>)> {
    let bytecode = parse_bytecode(0, &compiled.bytecode)?;

    let body_info = bytecode::body_info(&bytecode, compiled);

    let (body, src_instr, src_var) = body_info.iter().find_map(|body| {
        body.instrs.iter().find_map(|instr| {
            if let Some(var_info) = instr.into_body_var(body) {
                let (s, e) = compiled.locs[instr.pos];
                if !(pos >= s && pos < e) {
                    return None;
                }
                if var_info.local_depth == 0
                    && var_info.index < body.body.variable_count - body.body.variable_ids.len()
                {
                    // special unnamed variable
                    let mut iter = compiled.code.chars().skip(compiled.locs[instr.pos].0);
                    if let Some(c) = iter.next() {
                        if !(utils::special_lower(c)
                            || utils::special_upper(c)
                            || (c == '_' && matches!(iter.next(), Some('𝕣'))))
                        {
                            return None; // this instruction was auto-generated for a header load
                        }
                    }
                }
                return Some((body, instr, var_info));
            }
            None
        })
    })?;

    // eprintln!("highlighed instruction: {src_instr:?}; variable: {src_var:?}");

    let mut vec = vec![];
    let mut add_body = |body: &bytecode::BodyInfo<'_>| {
        for instr in body.instrs {
            if let Some(curr_var) = instr.into_body_var(body) {
                if src_var.index == curr_var.index && src_var.full_depth == curr_var.full_depth {
                    let kind = if instr.is_var_read() {
                        Kind::Read
                    } else if curr_var.local_depth == 0 {
                        Kind::Definition
                    } else {
                        Kind::Write
                    };
                    vec.push((instr.pos, kind));
                }
            }
        }
    };

    fn add_children(
        body: &bytecode::BodyInfo<'_>,
        add_body: &mut impl FnMut(&bytecode::BodyInfo<'_>),
        body_info: &Vec<bytecode::BodyInfo<'_>>,
    ) {
        add_body(body);
        for child in &body_info[body.idx].children {
            add_children(&body_info[*child], add_body, body_info);
        }
    }

    let mut top = body;
    while let Some(parent_idx) = top.parent {
        let p = &body_info[parent_idx];
        if p.depth < src_var.full_depth {
            break;
        }
        top = p;
    }
    add_children(top, &mut add_body, &body_info);

    Some((src_instr.pos, vec))
}
diff --git a/lsp/src/main.rs b/lsp/src/main.rs
index 95853b0..6efdf27 100644
--- a/lsp/src/main.rs
+++ b/lsp/src/main.rs
@@ -1,20 +1,29 @@
use std::collections::HashMap;

use bqn::BQNResult;
use cbqn::{eval, BQN};
use dashmap::DashMap;
use highlight::Kind;
use tower_lsp::jsonrpc::Result;
use tower_lsp::lsp_types::*;
use tower_lsp::{Client, LanguageServer, LspService, Server};

mod bqn;
mod bytecode;
mod diagnostics;
mod help;
mod highlight;
mod utils;

use diagnostics::get_diagnostics;

type DocumentData = (Vec<String>, BQNResult);

#[derive(Debug)]
struct Backend {
    client: Client,
    system_values: Vec<CompletionItem>,
    documents: DashMap<Url, Vec<String>>,
    documents: DashMap<Url, DocumentData>,
}

impl Backend {
@@ -34,13 +43,114 @@ impl Backend {
    }

    async fn changed_document(&self, uri: Url, text: &str) {
        self.documents
            .insert(uri.clone(), text.lines().map(str::to_owned).collect());
        self.documents.insert(
            uri.clone(),
            (
                text.lines().map(str::to_owned).collect(),
                bqn::compile(text),
            ),
        );

        let get = self.documents.get(&uri).unwrap();
        self.client
            .publish_diagnostics(uri, get_diagnostics(text), None)
            .publish_diagnostics(uri, get_diagnostics(&get.1), None)
            .await;
    }

    fn document_highlight_impl(
        &self,
        p: DocumentHighlightParams,
    ) -> Option<Vec<DocumentHighlight>> {
        let contents = self
            .documents
            .get(&p.text_document_position_params.text_document.uri)?;
        let pos = utils::position_to_offset(&contents, &p.text_document_position_params.position)?;

        let compiled = match contents.value() {
            (_, BQNResult::Compiled(c)) => c,
            _ => return None,
        };

        highlight::get_highlight(compiled, pos)
    }
    fn rename_impl(&self, p: RenameParams) -> Option<WorkspaceEdit> {
        let contents = self
            .documents
            .get(&p.text_document_position.text_document.uri)?;
        let pos = utils::position_to_offset(&contents, &p.text_document_position.position)?;

        let compiled = match contents.value() {
            (_, BQNResult::Compiled(c)) => c,
            _ => return None,
        };

        let (selected_instr, rest) = highlight::get_variables(compiled, pos)?;
        let prev_name: String = utils::codepoint_substring(compiled, compiled.locs[selected_instr]);

        let new_name = p.new_name;
        // eprintln!("rename {prev_name} → {new_name}");

        let mut map = HashMap::new();

        map.insert(
            p.text_document_position.text_document.uri,
            rest.iter()
                .map(|(pos, _)| {
                    let selected_prev = utils::codepoint_substring(compiled, compiled.locs[*pos]);
                    let new_text = if selected_prev == prev_name {
                        new_name.to_owned()
                    } else {
                        utils::convert_alike(&new_name, &selected_prev)
                    };
                    TextEdit {
                        range: utils::opcode_range(compiled, *pos),
                        new_text,
                    }
                })
                .collect(),
        );

        Some(WorkspaceEdit {
            changes: Some(map),
            document_changes: None,
            change_annotations: None,
        })
    }
    fn prepare_rename_impl(&self, p: TextDocumentPositionParams) -> Option<PrepareRenameResponse> {
        let contents = self.documents.get(&p.text_document.uri)?;
        let pos = utils::position_to_offset(&contents, &p.position)?;

        let compiled = match contents.value() {
            (_, BQNResult::Compiled(c)) => c,
            _ => return None,
        };

        let (selected_instr, _) = highlight::get_variables(compiled, pos)?;
        Some(PrepareRenameResponse::Range(utils::opcode_range(
            compiled,
            selected_instr,
        )))
    }
    fn goto_definition_impl(&self, p: GotoDefinitionParams) -> Option<GotoDefinitionResponse> {
        let contents = self
            .documents
            .get(&p.text_document_position_params.text_document.uri)?;
        let pos = utils::position_to_offset(&contents, &p.text_document_position_params.position)?;

        let compiled = match contents.value() {
            (_, BQNResult::Compiled(c)) => c,
            _ => return None,
        };

        let (_, rest) = highlight::get_variables(compiled, pos)?;

        let def = rest.iter().find(|c| c.1 == Kind::Definition)?.0;

        Some(GotoDefinitionResponse::Scalar(Location::new(
            p.text_document_position_params.text_document.uri,
            utils::opcode_range(compiled, def),
        )))
    }
}

#[tower_lsp::async_trait]
@@ -51,7 +161,15 @@ impl LanguageServer for Backend {
                text_document_sync: Some(TextDocumentSyncCapability::Kind(
                    TextDocumentSyncKind::FULL,
                )),
                document_highlight_provider: Some(OneOf::Left(true)),
                hover_provider: Some(HoverProviderCapability::Simple(true)),
                rename_provider: Some(OneOf::Right(RenameOptions {
                    prepare_provider: Some(true),
                    work_done_progress_options: WorkDoneProgressOptions {
                        work_done_progress: Some(false),
                    },
                })),
                definition_provider: Some(OneOf::Left(true)),
                completion_provider: Some(CompletionOptions::default()),
                ..Default::default()
            },
@@ -85,6 +203,30 @@ impl LanguageServer for Backend {
        Ok(Some(CompletionResponse::Array(self.system_values.clone())))
    }

    async fn document_highlight(
        &self,
        p: DocumentHighlightParams,
    ) -> Result<Option<Vec<DocumentHighlight>>> {
        Ok(self.document_highlight_impl(p))
    }

    async fn rename(&self, p: RenameParams) -> Result<Option<WorkspaceEdit>> {
        Ok(self.rename_impl(p))
    }
    async fn prepare_rename(
        &self,
        p: TextDocumentPositionParams,
    ) -> Result<Option<PrepareRenameResponse>> {
        Ok(self.prepare_rename_impl(p))
    }

    async fn goto_definition(
        &self,
        p: GotoDefinitionParams,
    ) -> Result<Option<GotoDefinitionResponse>> {
        Ok(self.goto_definition_impl(p))
    }

    async fn hover(&self, p: HoverParams) -> Result<Option<Hover>> {
        let contents = match self
            .documents
@@ -94,11 +236,11 @@ impl LanguageServer for Backend {
            None => return Ok(None),
        };
        let pos = p.text_document_position_params.position;
        let line = match contents.get(pos.line as usize) {
        let line = match contents.0.get(pos.line as usize) {
            Some(x) => x,
            None => return Ok(None),
        };
        let c = match line.chars().nth(pos.character as usize) {
        let c = match line.chars().nth(utils::line_offset(line, &pos)) {
            Some(x) => x,
            None => return Ok(None),
        };
diff --git a/lsp/src/utils.rs b/lsp/src/utils.rs
new file mode 100644
index 0000000..ba78d21
--- /dev/null
+++ b/lsp/src/utils.rs
@@ -0,0 +1,192 @@
use crate::{bqn::CompilerResult, DocumentData};
use tower_lsp::lsp_types::*;

pub fn utf32_to_position(text: &str, idx: usize) -> Position {
    let byte_pos = match text.char_indices().nth(idx) {
        Some(pos) => pos.0,
        None => text.len(),
    };

    let line = text[..byte_pos].bytes().filter(|&c| c == b'\n').count();
    let offset = text[..byte_pos]
        .chars()
        .rev()
        .take_while(|&c| c != '\n')
        .map(|c| c.len_utf16())
        .sum::<usize>();

    Position::new(line as u32, offset as u32)
}

pub fn text_range(text: &str, start: usize, count: usize) -> Range {
    Range::new(
        utf32_to_position(text, start),
        utf32_to_position(text, start + count),
    )
}

pub fn opcode_range(c: &CompilerResult, pos: usize) -> Range {
    let (s, e) = c.locs[pos];
    text_range(&c.code, s, e - s)
}

pub fn codepoint_substring(c: &CompilerResult, span: (usize, usize)) -> String {
    c.code.chars().take(span.1).skip(span.0).collect()
}

pub fn position_to_offset(a: &DocumentData, p: &Position) -> Option<usize> {
    let ln = p.line as usize;
    if ln >= a.0.len() {
        return None;
    }
    let previous_lines = a.0[0..ln]
        .iter()
        .map(|x| x.chars().count() + 1)
        .sum::<usize>();
    let line_position = line_offset(&a.0[ln], p);
    Some(previous_lines + line_position)
}

pub fn line_offset(line: &str, p: &Position) -> usize {
    let mut pos: usize = 0;
    line.chars()
        .take_while(|c| {
            pos += c.len_utf16();
            pos <= p.character as usize
        })
        .count()
}

#[derive(Copy, Clone, PartialEq)]
pub enum Case {
    Lower,     // helloworld
    Upper,     // HELLOWORLD
    Dromedary, // helloWorld
    Pascal,    // HelloWorld
}
pub struct NameInfo {
    head_underscore: bool,
    tail_underscore: bool,
    core: String, // leading & trailing undescrores removed
    case: Case,
}

pub fn bqn_name_info(s: &str) -> NameInfo {
    let head_underscores = s.as_bytes().iter().take_while(|c| **c == b'_').count();
    let tail_underscores = if head_underscores == 0 {
        0
    } else {
        s.as_bytes()
            .iter()
            .rev()
            .take_while(|c| **c == b'_')
            .count()
    };

    let core = &s[head_underscores..s.len() - tail_underscores];

    let count = core.chars().count();

    let case = if count > 1 && !core.chars().any(|c| c.is_lowercase()) {
        Case::Upper
    } else if count > 1 && !core.chars().any(|c| c.is_uppercase()) {
        Case::Lower
    } else if core.chars().next().unwrap().is_uppercase() {
        Case::Pascal
    } else {
        Case::Dromedary
    };

    NameInfo {
        head_underscore: head_underscores > 0,
        tail_underscore: tail_underscores > 0,
        core: core.to_owned(),
        case,
    }
}

pub fn special_lower(c: char) -> bool {
    matches!(c, '𝕒'..='𝕫')
}
pub fn special_upper(c: char) -> bool {
    matches!(c, '𝔸'..='𝕑')
}

pub fn to_uppercase(c: char) -> impl Iterator<Item = char> {
    if special_lower(c) {
        char::from_u32((c as u32) - ('𝕒' as u32 - '𝔸' as u32))
            .unwrap()
            .to_uppercase()
    } else {
        c.to_uppercase()
    }
}
pub fn to_lowercase(c: char) -> impl Iterator<Item = char> {
    if special_upper(c) {
        char::from_u32((c as u32) + ('𝕒' as u32 - '𝔸' as u32))
            .unwrap()
            .to_lowercase()
    } else {
        c.to_lowercase()
    }
}

pub fn convert_alike(name: &str, style: &str) -> String {
    if name == style {
        return name.to_owned();
    }

    let style_info = bqn_name_info(style);
    let name_info = bqn_name_info(name);

    if style_info.head_underscore == name_info.head_underscore
        && style_info.tail_underscore == name_info.tail_underscore
        && style_info.case == name_info.case
    {
        return name.to_owned();
    }

    let mut res: String = String::new();
    if style_info.head_underscore {
        res.push('_');
    }

    match (name_info.case, style_info.case) {
        (a, b) if a == b => {
            res.push_str(&name_info.core);
        }
        (_, Case::Upper) | (Case::Upper, Case::Pascal) => {
            res.push_str(
                &name_info
                    .core
                    .chars()
                    .flat_map(to_uppercase)
                    .collect::<String>(),
            );
        }
        (Case::Upper, Case::Dromedary) | (_, Case::Lower) => {
            res.push_str(
                &name_info
                    .core
                    .chars()
                    .flat_map(to_lowercase)
                    .collect::<String>(),
            );
        }
        (_, Case::Pascal) => {
            let mut chars = name_info.core.chars();
            let first = to_uppercase(chars.next().unwrap());
            res.push_str(&first.chain(chars).collect::<String>());
        }
        (_, Case::Dromedary) => {
            let mut chars = name_info.core.chars();
            let first = to_lowercase(chars.next().unwrap());
            res.push_str(&first.chain(chars).collect::<String>());
        }
    }

    if style_info.tail_underscore {
        res.push('_');
    }
    res
}
-- 
2.45.2

[PATCH bqnlsp 5/7] Implement go to references Export this patch

From: dzaima <dzaimagit@gmail.com>

---
 lsp/src/main.rs | 30 ++++++++++++++++++++++++++++++
 1 file changed, 30 insertions(+)

diff --git a/lsp/src/main.rs b/lsp/src/main.rs
index 6efdf27..7d7e643 100644
--- a/lsp/src/main.rs
+++ b/lsp/src/main.rs
@@ -151,6 +151,32 @@ impl Backend {
            utils::opcode_range(compiled, def),
        )))
    }
    fn references_impl(&self, p: ReferenceParams) -> Option<Vec<Location>> {
        let contents = self
            .documents
            .get(&p.text_document_position.text_document.uri)?;
        let pos = utils::position_to_offset(&contents, &p.text_document_position.position)?;

        let compiled = match contents.value() {
            (_, BQNResult::Compiled(c)) => c,
            _ => return None,
        };

        let (_, rest) = highlight::get_variables(compiled, pos)?;

        let def = rest
            .iter()
            .filter(|(_pos, kind)| *kind != Kind::Definition)
            .map(|(pos, _kind)| {
                Location::new(
                    p.text_document_position.text_document.uri.clone(),
                    utils::opcode_range(compiled, *pos),
                )
            })
            .collect();

        Some(def)
    }
}

#[tower_lsp::async_trait]
@@ -169,6 +195,7 @@ impl LanguageServer for Backend {
                        work_done_progress: Some(false),
                    },
                })),
                references_provider: Some(OneOf::Left(true)),
                definition_provider: Some(OneOf::Left(true)),
                completion_provider: Some(CompletionOptions::default()),
                ..Default::default()
@@ -226,6 +253,9 @@ impl LanguageServer for Backend {
    ) -> Result<Option<GotoDefinitionResponse>> {
        Ok(self.goto_definition_impl(p))
    }
    async fn references(&self, p: ReferenceParams) -> Result<Option<Vec<Location>>> {
        Ok(self.references_impl(p))
    }

    async fn hover(&self, p: HoverParams) -> Result<Option<Hover>> {
        let contents = match self
-- 
2.45.2

[PATCH bqnlsp 6/7] Implement symbol listing Export this patch

From: dzaima <dzaimagit@gmail.com>

---
 lsp/src/main.rs  | 52 ++++++++++++++++++++++++++++++++++++++++++++++++
 lsp/src/utils.rs |  8 ++++----
 2 files changed, 56 insertions(+), 4 deletions(-)

diff --git a/lsp/src/main.rs b/lsp/src/main.rs
index 7d7e643..627d71f 100644
--- a/lsp/src/main.rs
+++ b/lsp/src/main.rs
@@ -16,6 +16,7 @@ mod highlight;
mod utils;

use diagnostics::get_diagnostics;
use utils::Case;

type DocumentData = (Vec<String>, BQNResult);

@@ -177,6 +178,50 @@ impl Backend {

        Some(def)
    }
    fn symbols_impl(&self, p: DocumentSymbolParams) -> Option<DocumentSymbolResponse> {
        let contents = self.documents.get(&p.text_document.uri)?;

        let compiled = match contents.value() {
            (_, BQNResult::Compiled(c)) => c,
            _ => return None,
        };

        let bytecode = bytecode::parse_bytecode(0, &compiled.bytecode)?;
        let body_info = bytecode::body_info(&bytecode, compiled);

        let chars = compiled.code.chars().collect::<Vec<_>>();
        let mut r: Vec<DocumentSymbol> = Vec::new();
        for body in body_info {
            for instr in body.instrs {
                if instr.is_var_mut() {
                    let (s, e) = compiled.locs[instr.pos];
                    let name = chars[s..e].iter().collect::<String>();
                    let range = utils::opcode_range(compiled, instr.pos);
                    let name_info = utils::bqn_name_info(&name);

                    #[allow(deprecated)]
                    r.push(DocumentSymbol {
                        name,
                        detail: None,
                        kind: if name_info.head_underscore {
                            SymbolKind::OPERATOR
                        } else if matches!(name_info.case, Case::Upper | Case::Pascal) {
                            SymbolKind::FUNCTION
                        } else {
                            SymbolKind::VARIABLE
                        },
                        tags: None,
                        deprecated: None,
                        range,
                        selection_range: range,
                        children: None,
                    });
                }
            }
        }

        Some(DocumentSymbolResponse::Nested(r))
    }
}

#[tower_lsp::async_trait]
@@ -196,6 +241,7 @@ impl LanguageServer for Backend {
                    },
                })),
                references_provider: Some(OneOf::Left(true)),
                document_symbol_provider: Some(OneOf::Left(true)),
                definition_provider: Some(OneOf::Left(true)),
                completion_provider: Some(CompletionOptions::default()),
                ..Default::default()
@@ -256,6 +302,12 @@ impl LanguageServer for Backend {
    async fn references(&self, p: ReferenceParams) -> Result<Option<Vec<Location>>> {
        Ok(self.references_impl(p))
    }
    async fn document_symbol(
        &self,
        p: DocumentSymbolParams,
    ) -> Result<Option<DocumentSymbolResponse>> {
        Ok(self.symbols_impl(p))
    }

    async fn hover(&self, p: HoverParams) -> Result<Option<Hover>> {
        let contents = match self
diff --git a/lsp/src/utils.rs b/lsp/src/utils.rs
index ba78d21..f00f3cb 100644
--- a/lsp/src/utils.rs
+++ b/lsp/src/utils.rs
@@ -65,10 +65,10 @@ pub enum Case {
    Pascal,    // HelloWorld
}
pub struct NameInfo {
    head_underscore: bool,
    tail_underscore: bool,
    core: String, // leading & trailing undescrores removed
    case: Case,
    pub head_underscore: bool,
    pub tail_underscore: bool,
    pub core: String, // leading & trailing undescrores removed
    pub case: Case,
}

pub fn bqn_name_info(s: &str) -> NameInfo {
-- 
2.45.2

[PATCH bqnlsp 7/7] Optimized position conversion Export this patch

From: dzaima <dzaimagit@gmail.com>

---
 lsp/src/diagnostics.rs | 42 ++++++++++++------------
 lsp/src/highlight.rs   | 10 ++++--
 lsp/src/main.rs        | 23 ++++++++------
 lsp/src/utils.rs       | 72 ++++++++++++++++++++++++++++--------------
 4 files changed, 91 insertions(+), 56 deletions(-)

diff --git a/lsp/src/diagnostics.rs b/lsp/src/diagnostics.rs
index 61cfff0..abb13bb 100644
--- a/lsp/src/diagnostics.rs
+++ b/lsp/src/diagnostics.rs
@@ -1,28 +1,30 @@
use crate::{bqn::BQNResult, utils::text_range};
use crate::{bqn::BQNResult, utils::PositionConverter};
use tower_lsp::lsp_types::*;

pub fn get_diagnostics(compiled: &BQNResult) -> Vec<Diagnostic> {
    match compiled {
        BQNResult::Error { code, span, error } => span
            .chunks(2)
            .map(|r| {
                let span_char_count = if r.len() == 1 {
                    1
                } else {
                    (r[1] - r[0]) as usize + 1
                };
        BQNResult::Error { code, span, error } => {
            let pc = PositionConverter::new(code);
            span.chunks(2)
                .map(|r| {
                    let span_char_count = if r.len() == 1 {
                        1
                    } else {
                        (r[1] - r[0]) as usize + 1
                    };

                Diagnostic::new(
                    text_range(code, r[0] as usize, span_char_count),
                    Some(DiagnosticSeverity::ERROR),
                    None,
                    None,
                    error.clone(),
                    None,
                    None,
                )
            })
            .collect::<Vec<_>>(),
                    Diagnostic::new(
                        pc.text_range(r[0] as usize, span_char_count),
                        Some(DiagnosticSeverity::ERROR),
                        None,
                        None,
                        error.clone(),
                        None,
                        None,
                    )
                })
                .collect::<Vec<_>>()
        }
        BQNResult::InternalError(e) => {
            eprintln!("{}", e);
            vec![]
diff --git a/lsp/src/highlight.rs b/lsp/src/highlight.rs
index 474756d..4b7de33 100644
--- a/lsp/src/highlight.rs
+++ b/lsp/src/highlight.rs
@@ -3,7 +3,7 @@ use tower_lsp::lsp_types::{DocumentHighlight, DocumentHighlightKind};
use crate::{
    bqn::CompilerResult,
    bytecode::{self, parse_bytecode},
    utils,
    utils::{self, PositionConverter},
};

#[derive(Debug, PartialEq)]
@@ -13,13 +13,17 @@ pub enum Kind {
    Read,
}

pub fn get_highlight(compiled: &CompilerResult, pos: usize) -> Option<Vec<DocumentHighlight>> {
pub fn get_highlight(
    compiled: &CompilerResult,
    pc: &PositionConverter,
    pos: usize,
) -> Option<Vec<DocumentHighlight>> {
    Some(
        get_variables(compiled, pos)?
            .1
            .iter()
            .map(|(pos, kind)| DocumentHighlight {
                range: utils::opcode_range(compiled, *pos),
                range: pc.opcode_range(compiled, *pos),
                kind: Some(if *kind == Kind::Read {
                    DocumentHighlightKind::READ
                } else {
diff --git a/lsp/src/main.rs b/lsp/src/main.rs
index 627d71f..bb6761c 100644
--- a/lsp/src/main.rs
+++ b/lsp/src/main.rs
@@ -16,7 +16,7 @@ mod highlight;
mod utils;

use diagnostics::get_diagnostics;
use utils::Case;
use utils::{Case, PositionConverter};

type DocumentData = (Vec<String>, BQNResult);

@@ -71,8 +71,9 @@ impl Backend {
            (_, BQNResult::Compiled(c)) => c,
            _ => return None,
        };
        let pc = PositionConverter::new(&compiled.code);

        highlight::get_highlight(compiled, pos)
        highlight::get_highlight(compiled, &pc, pos)
    }
    fn rename_impl(&self, p: RenameParams) -> Option<WorkspaceEdit> {
        let contents = self
@@ -84,6 +85,7 @@ impl Backend {
            (_, BQNResult::Compiled(c)) => c,
            _ => return None,
        };
        let pc = PositionConverter::new(&compiled.code);

        let (selected_instr, rest) = highlight::get_variables(compiled, pos)?;
        let prev_name: String = utils::codepoint_substring(compiled, compiled.locs[selected_instr]);
@@ -104,7 +106,7 @@ impl Backend {
                        utils::convert_alike(&new_name, &selected_prev)
                    };
                    TextEdit {
                        range: utils::opcode_range(compiled, *pos),
                        range: pc.opcode_range(compiled, *pos),
                        new_text,
                    }
                })
@@ -127,10 +129,9 @@ impl Backend {
        };

        let (selected_instr, _) = highlight::get_variables(compiled, pos)?;
        Some(PrepareRenameResponse::Range(utils::opcode_range(
            compiled,
            selected_instr,
        )))
        Some(PrepareRenameResponse::Range(
            PositionConverter::new(&compiled.code).opcode_range(compiled, selected_instr),
        ))
    }
    fn goto_definition_impl(&self, p: GotoDefinitionParams) -> Option<GotoDefinitionResponse> {
        let contents = self
@@ -149,7 +150,7 @@ impl Backend {

        Some(GotoDefinitionResponse::Scalar(Location::new(
            p.text_document_position_params.text_document.uri,
            utils::opcode_range(compiled, def),
            PositionConverter::new(&compiled.code).opcode_range(compiled, def),
        )))
    }
    fn references_impl(&self, p: ReferenceParams) -> Option<Vec<Location>> {
@@ -162,6 +163,7 @@ impl Backend {
            (_, BQNResult::Compiled(c)) => c,
            _ => return None,
        };
        let pc = PositionConverter::new(&compiled.code);

        let (_, rest) = highlight::get_variables(compiled, pos)?;

@@ -171,7 +173,7 @@ impl Backend {
            .map(|(pos, _kind)| {
                Location::new(
                    p.text_document_position.text_document.uri.clone(),
                    utils::opcode_range(compiled, *pos),
                    pc.opcode_range(compiled, *pos),
                )
            })
            .collect();
@@ -188,6 +190,7 @@ impl Backend {

        let bytecode = bytecode::parse_bytecode(0, &compiled.bytecode)?;
        let body_info = bytecode::body_info(&bytecode, compiled);
        let pc = PositionConverter::new(&compiled.code);

        let chars = compiled.code.chars().collect::<Vec<_>>();
        let mut r: Vec<DocumentSymbol> = Vec::new();
@@ -196,7 +199,7 @@ impl Backend {
                if instr.is_var_mut() {
                    let (s, e) = compiled.locs[instr.pos];
                    let name = chars[s..e].iter().collect::<String>();
                    let range = utils::opcode_range(compiled, instr.pos);
                    let range = pc.opcode_range(compiled, instr.pos);
                    let name_info = utils::bqn_name_info(&name);

                    #[allow(deprecated)]
diff --git a/lsp/src/utils.rs b/lsp/src/utils.rs
index f00f3cb..ebdface 100644
--- a/lsp/src/utils.rs
+++ b/lsp/src/utils.rs
@@ -1,33 +1,59 @@
use std::cell::OnceCell;

use crate::{bqn::CompilerResult, DocumentData};
use tower_lsp::lsp_types::*;

pub fn utf32_to_position(text: &str, idx: usize) -> Position {
    let byte_pos = match text.char_indices().nth(idx) {
        Some(pos) => pos.0,
        None => text.len(),
    };

    let line = text[..byte_pos].bytes().filter(|&c| c == b'\n').count();
    let offset = text[..byte_pos]
        .chars()
        .rev()
        .take_while(|&c| c != '\n')
        .map(|c| c.len_utf16())
        .sum::<usize>();

    Position::new(line as u32, offset as u32)
pub struct PositionConverter<'a> {
    text: &'a str,
    line_info: OnceCell<Vec<(usize, usize)>>, // (codepoint index, offset in text) per line
}

pub fn text_range(text: &str, start: usize, count: usize) -> Range {
    Range::new(
        utf32_to_position(text, start),
        utf32_to_position(text, start + count),
    )
impl PositionConverter<'_> {
    fn line_info(&self) -> &[(usize, usize)] {
        self.line_info.get_or_init(|| {
            let mut r = vec![(0, 0)];
            self.text
                .char_indices()
                .enumerate()
                .filter(|(_, (_, c))| *c == '\n')
                .for_each(|(codepoint, (off, _))| {
                    r.push((codepoint + 1, off + 1));
                });
            r
        })
    }
}

pub fn opcode_range(c: &CompilerResult, pos: usize) -> Range {
    let (s, e) = c.locs[pos];
    text_range(&c.code, s, e - s)
impl<'a> PositionConverter<'a> {
    pub fn new(text: &'a str) -> Self {
        Self {
            text,
            line_info: OnceCell::new(),
        }
    }
    pub fn codepoint_index_to_pos(&self, idx: usize) -> Position {
        let line_info = self.line_info();
        let ln = line_info
            .binary_search_by(|(codepoint, _)| codepoint.cmp(&idx)).unwrap_or_else(|x| x - 1);
        let (line_idx, line_off) = line_info[ln];
        let offset = self.text[line_off..]
            .chars()
            .take(idx - line_idx)
            .map(|c| c.len_utf16())
            .sum::<usize>();

        Position::new(ln as u32, offset as u32)
    }
    pub fn text_range(&self, start: usize, count: usize) -> Range {
        Range::new(
            self.codepoint_index_to_pos(start),
            self.codepoint_index_to_pos(start + count),
        )
    }
    pub fn opcode_range(&self, c: &CompilerResult, pos: usize) -> Range {
        let (s, e) = c.locs[pos];
        self.text_range(s, e - s)
    }
}

pub fn codepoint_substring(c: &CompilerResult, span: (usize, usize)) -> String {
-- 
2.45.2