{
  "_id": "6a20768bcd65a98ecbd13fed",
  "Package": "openalexVectorComp",
  "Type": "Package",
  "Title": "Embedding Vectorization and Distance-Based Scoring Workflows",
  "Version": "0.3.3",
  "Authors@R": "c(\nperson(given = \"Rainer M\", family = \"Krug\", role = c(\"aut\", \"cre\"), email = \"Rainer@krugs.de\"),\nperson(given = \"ChatGPT\", family = \"Assistant\", role = \"ctb\")\n)",
  "Author": "Rainer M Krug [aut, cre], ChatGPT Assistant [ctb]",
  "Maintainer": "Rainer M Krug <Rainer@krugs.de>",
  "Description": "R-first orchestration for text vectorization (embeddings),\nembedding distance computation, and distance-based scoring\nworkflows. Supports backend-neutral embedding providers (HF,\nOpenAI, TEI), prototype cosine-distance scoring, reference-area\ndistance scoring, and threshold calibration utilities.",
  "License": "MIT + file LICENSE",
  "Encoding": "UTF-8",
  "Config/testthat/edition": "3",
  "VignetteBuilder": "knitr, quarto",
  "URL": "https://github.com/openalexPro/openalexVectorComp,\nhttps://openalexpro.github.io/openalexVectorComp/,\nhttps://doi.org/10.5281/zenodo.19607514",
  "BugReports": "https://github.com/openalexPro/openalexVectorComp/issues",
  "RoxygenNote": "7.3.3",
  "Roxygen": "list(markdown = TRUE)",
  "Config/roxygen2/version": "8.0.0",
  "Config/pak/sysreqs": "cmake libssl-dev",
  "Repository": "https://openalexpro.r-universe.dev",
  "Date/Publication": "2026-06-03 13:46:51 UTC",
  "RemoteUrl": "https://github.com/openalexPro/openalexVectorComp",
  "RemoteRef": "main",
  "RemoteSha": "e2eb96f95ddd6623047bf929ed540bb770bcac03",
  "NeedsCompilation": "no",
  "Packaged": {
    "Date": "2026-06-03 18:32:22 UTC",
    "User": "root"
  },
  "MD5sum": "b1940ddacecbce17838bb68b1b47e917",
  "_user": "openalexpro",
  "_type": "src",
  "_file": "openalexVectorComp_0.3.3.tar.gz",
  "_fileid": "26f288e3dae8ff93b499a15caef29edaa68337e1bed2a16a502464beecd318e0",
  "_filesize": 346317,
  "_sha256": "26f288e3dae8ff93b499a15caef29edaa68337e1bed2a16a502464beecd318e0",
  "_created": "2026-06-03T18:32:22.000Z",
  "_published": "2026-06-03T18:46:35.545Z",
  "_distro": "noble",
  "_jobs": [
    {
      "job": 79367530528,
      "time": 163,
      "config": "linux-devel-x86_64",
      "r": "4.7.0",
      "check": "OK",
      "artifact": "7393499853"
    },
    {
      "job": 79367530526,
      "time": 186,
      "config": "linux-release-x86_64",
      "r": "4.6.0",
      "check": "OK",
      "artifact": "7393508274"
    },
    {
      "job": 79367530584,
      "time": 162,
      "config": "macos-oldrel-arm64",
      "r": "4.5.3",
      "check": "OK",
      "artifact": "7393735730"
    },
    {
      "job": 79367530579,
      "time": 181,
      "config": "macos-release-arm64",
      "r": "4.6.0",
      "check": "OK",
      "artifact": "7393652842"
    },
    {
      "job": 79366926377,
      "time": 184,
      "config": "source",
      "r": "4.6.0",
      "check": "OK",
      "artifact": "7393438126"
    },
    {
      "job": 79367530540,
      "time": 140,
      "config": "wasm-release",
      "r": "4.6.0",
      "check": "OK",
      "artifact": "7393491993"
    },
    {
      "job": 79367530647,
      "time": 170,
      "config": "windows-devel",
      "r": "4.7.0",
      "check": "OK",
      "artifact": "7393502488"
    },
    {
      "job": 79367530684,
      "time": 198,
      "config": "windows-oldrel",
      "r": "4.5.3",
      "check": "OK",
      "artifact": "7393512406"
    },
    {
      "job": 79367530588,
      "time": 183,
      "config": "windows-release",
      "r": "4.6.0",
      "check": "OK",
      "artifact": "7393507146"
    }
  ],
  "_buildurl": "https://github.com/r-universe/openalexpro/actions/runs/26904863901",
  "_status": "success",
  "_host": "GitHub-Actions",
  "_upstream": "https://github.com/openalexPro/openalexVectorComp",
  "_commit": {
    "id": "e2eb96f95ddd6623047bf929ed540bb770bcac03",
    "author": "Rainer M Krug <rkrug@users.noreply.github.com>",
    "committer": "GitHub <noreply@github.com>",
    "message": "Merge pull request #9 from openalexPro/claude/specter2-support\n\nrelease: v0.3.3 add SPECTER2 + TEI support",
    "time": 1780494411
  },
  "_maintainer": {
    "name": "Rainer M Krug",
    "email": "rainer@krugs.de",
    "login": "rkrug",
    "description": "",
    "uuid": 487172
  },
  "_registered": true,
  "_dependencies": [
    {
      "package": "R",
      "version": ">= 4.1.0",
      "role": "Depends"
    },
    {
      "package": "cli",
      "role": "Imports"
    },
    {
      "package": "httr2",
      "role": "Imports"
    },
    {
      "package": "dplyr",
      "role": "Imports"
    },
    {
      "package": "arrow",
      "role": "Imports"
    },
    {
      "package": "digest",
      "role": "Imports"
    },
    {
      "package": "yaml",
      "role": "Imports"
    },
    {
      "package": "uwot",
      "role": "Imports"
    },
    {
      "package": "stats",
      "role": "Imports"
    },
    {
      "package": "utils",
      "role": "Imports"
    },
    {
      "package": "ggplot2",
      "role": "Imports"
    },
    {
      "package": "jsonlite",
      "role": "Imports"
    },
    {
      "package": "knitr",
      "role": "Suggests"
    },
    {
      "package": "quarto",
      "role": "Suggests"
    },
    {
      "package": "roxygen2",
      "role": "Suggests"
    },
    {
      "package": "rmarkdown",
      "role": "Suggests"
    },
    {
      "package": "keyring",
      "role": "Suggests"
    },
    {
      "package": "testthat",
      "version": ">= 3.0.0",
      "role": "Suggests"
    }
  ],
  "_owner": "openalexpro",
  "_selfowned": true,
  "_usedby": 0,
  "_updates": [
    {
      "week": "2025-45",
      "n": 1
    },
    {
      "week": "2026-03",
      "n": 2
    },
    {
      "week": "2026-11",
      "n": 2
    },
    {
      "week": "2026-12",
      "n": 3
    },
    {
      "week": "2026-13",
      "n": 1
    },
    {
      "week": "2026-14",
      "n": 2
    },
    {
      "week": "2026-22",
      "n": 7
    },
    {
      "week": "2026-23",
      "n": 2
    }
  ],
  "_tags": [
    {
      "name": "v0.3.0",
      "date": "2026-04-01"
    },
    {
      "name": "v0.3.1",
      "date": "2026-05-29"
    },
    {
      "name": "v0.3.2",
      "date": "2026-05-29"
    }
  ],
  "_stars": 0,
  "_contributors": [
    {
      "user": "rkrug",
      "count": 35,
      "uuid": 487172
    }
  ],
  "_userbio": {
    "uuid": 288549264,
    "type": "organization",
    "name": "openalexPro",
    "description": "Tools to work with OpenAlex data, API as well as snapshot"
  },
  "_downloads": {
    "count": 0,
    "source": "https://cranlogs.r-pkg.org/downloads/total/last-month/openalexVectorComp"
  },
  "_devurl": "https://github.com/openalexpro/openalexvectorcomp",
  "_pkgdown": "https://openalexpro.github.io/openalexVectorComp/",
  "_quarto": true,
  "_topics": [
    "quarto"
  ],
  "_rbuild": "4.6.0",
  "_assets": [
    "extra/citation.cff",
    "extra/citation.html",
    "extra/citation.json",
    "extra/citation.txt",
    "extra/contents.json",
    "extra/NEWS.html",
    "extra/NEWS.txt",
    "extra/openalexVectorComp.html",
    "extra/readme.html",
    "extra/readme.md",
    "manual.pdf"
  ],
  "_cranurl": false,
  "_exports": [
    "backend_config",
    "backend_embed_texts",
    "backend_info",
    "backend_read",
    "backend_save",
    "backend_specter2_tei",
    "batch_collect_openai",
    "batch_status_openai",
    "batch_submit_openai",
    "calibrate_threshold",
    "clean_abstract_for_embedding",
    "demo_finalize_openai_batch",
    "distance_cosine",
    "distance_reference_cosine",
    "distance_ridge",
    "embed_corpus",
    "embed_texts",
    "fit_ridge",
    "plot_embeddings_pca",
    "plot_embeddings_umap",
    "run_demo_openai",
    "run_demo_openalex",
    "score_reference_cosine",
    "score_ridge",
    "similarity_cosine"
  ],
  "_help": [
    {
      "page": "backend_config",
      "title": "Build embedding backend configuration",
      "topics": [
        "backend_config"
      ]
    },
    {
      "page": "backend_embed_texts",
      "title": "Embed texts via configured backend",
      "topics": [
        "backend_embed_texts"
      ]
    },
    {
      "page": "backend_info",
      "title": "Get embedding backend model/service information",
      "topics": [
        "backend_info"
      ]
    },
    {
      "page": "backend_read",
      "title": "Read backend configuration from YAML",
      "topics": [
        "backend_read"
      ]
    },
    {
      "page": "backend_save",
      "title": "Save backend configuration to YAML",
      "topics": [
        "backend_save"
      ]
    },
    {
      "page": "backend_specter2_tei",
      "title": "Backend preset for a local TEI server serving the merged SPECTER2 proximity model",
      "topics": [
        "backend_specter2_tei"
      ]
    },
    {
      "page": "batch_collect_openai",
      "title": "Collect completed OpenAI batch embedding jobs",
      "topics": [
        "batch_collect_openai"
      ]
    },
    {
      "page": "batch_status_openai",
      "title": "Inspect OpenAI batch state for a label",
      "topics": [
        "batch_status_openai"
      ]
    },
    {
      "page": "batch_submit_openai",
      "title": "Submit OpenAI Batch jobs for corpus embeddings (asynchronous)",
      "topics": [
        "batch_submit_openai"
      ]
    },
    {
      "page": "calibrate_threshold",
      "title": "Calibrate threshold from Parquet scores by streaming batches",
      "topics": [
        "calibrate_threshold"
      ]
    },
    {
      "page": "clean_abstract_for_embedding",
      "title": "Clean title/abstract rows into embedding-ready text",
      "topics": [
        "clean_abstract_for_embedding"
      ]
    },
    {
      "page": "demo_finalize_openai_batch",
      "title": "Finalize OpenAI demo batch jobs and compare direct vs batch embeddings",
      "topics": [
        "demo_finalize_openai_batch"
      ]
    },
    {
      "page": "distance_cosine",
      "title": "Cosine distance between two numeric vectors",
      "topics": [
        "distance_cosine"
      ]
    },
    {
      "page": "distance_reference_cosine",
      "title": "Pairwise cosine distances with centroid axis between label partitions",
      "topics": [
        "distance_reference_cosine"
      ]
    },
    {
      "page": "distance_ridge",
      "title": "Compute corpus distance to a reference embedding area",
      "topics": [
        "distance_ridge"
      ]
    },
    {
      "page": "distances",
      "title": "Join prototype and ridge distances lazily via Arrow",
      "topics": [
        "distances"
      ]
    },
    {
      "page": "embed_corpus",
      "title": "Stream a corpus dataset, embed in batches, and write Parquets",
      "topics": [
        "embed_corpus"
      ]
    },
    {
      "page": "embed_texts",
      "title": "Embed texts through a configured backend",
      "topics": [
        "embed_texts"
      ]
    },
    {
      "page": "fit_ridge",
      "title": "Fit a reference-area model from embeddings parquet",
      "topics": [
        "fit_ridge"
      ]
    },
    {
      "page": "plot_embeddings_pca",
      "title": "Plot embeddings via PCA, colored by arbitrary labels",
      "topics": [
        "plot_embeddings_pca"
      ]
    },
    {
      "page": "plot_embeddings_umap",
      "title": "Plot embeddings via UMAP, colored by arbitrary labels",
      "topics": [
        "plot_embeddings_umap"
      ]
    },
    {
      "page": "run_demo_openai",
      "title": "Create and optionally run an OpenAI-based demo project via Quarto",
      "topics": [
        "run_demo_openai"
      ]
    },
    {
      "page": "run_demo_openalex",
      "title": "Create and optionally run a self-contained demo project via Quarto",
      "topics": [
        "run_demo_openalex"
      ]
    },
    {
      "page": "score_reference_cosine",
      "title": "Convert reference-cosine distances to scores",
      "topics": [
        "score_reference_cosine"
      ]
    },
    {
      "page": "score_ridge",
      "title": "Convert ridge distances to ridge scores",
      "topics": [
        "score_ridge"
      ]
    },
    {
      "page": "similarity_cosine",
      "title": "Cosine similarity between two numeric vectors",
      "topics": [
        "similarity_cosine"
      ]
    }
  ],
  "_readme": "https://github.com/openalexPro/openalexVectorComp/raw/main/README.md",
  "_rundeps": [
    "arrow",
    "askpass",
    "assertthat",
    "BH",
    "bit",
    "bit64",
    "cli",
    "cpp11",
    "curl",
    "digest",
    "dplyr",
    "dqrng",
    "farver",
    "FNN",
    "generics",
    "ggplot2",
    "glue",
    "gtable",
    "httr2",
    "irlba",
    "isoband",
    "jsonlite",
    "labeling",
    "lattice",
    "lifecycle",
    "magrittr",
    "Matrix",
    "openssl",
    "pillar",
    "pkgconfig",
    "purrr",
    "R6",
    "rappdirs",
    "RColorBrewer",
    "Rcpp",
    "RcppAnnoy",
    "RcppEigen",
    "RcppProgress",
    "rlang",
    "RSpectra",
    "S7",
    "scales",
    "sitmo",
    "sys",
    "tibble",
    "tidyselect",
    "utf8",
    "uwot",
    "vctrs",
    "viridisLite",
    "withr",
    "yaml"
  ],
  "_vignettes": [
    {
      "source": "abstract-cleaning.qmd",
      "filename": "abstract-cleaning.html",
      "title": "abstract-cleaning",
      "engine": "quarto::html",
      "headings": [],
      "created": "2026-03-15 10:43:08",
      "modified": "2026-03-15 10:43:08",
      "commits": 1
    },
    {
      "source": "backend-architecture.qmd",
      "filename": "backend-architecture.html",
      "title": "backend-architecture",
      "engine": "quarto::html",
      "headings": [],
      "created": "2026-03-15 10:43:08",
      "modified": "2026-04-01 14:21:50",
      "commits": 3
    },
    {
      "source": "openai-batch-async.qmd",
      "filename": "openai-batch-async.html",
      "title": "openai-batch-async",
      "engine": "quarto::html",
      "headings": [],
      "created": "2026-03-17 12:38:12",
      "modified": "2026-04-01 14:21:50",
      "commits": 2
    },
    {
      "source": "package-overview.qmd",
      "filename": "package-overview.html",
      "title": "package-overview",
      "engine": "quarto::html",
      "headings": [],
      "created": "2026-03-15 10:43:08",
      "modified": "2026-04-01 14:21:50",
      "commits": 6
    },
    {
      "source": "simplestart.qmd",
      "filename": "simplestart.html",
      "title": "simplestart",
      "engine": "quarto::html",
      "headings": [],
      "created": "2026-01-12 13:28:20",
      "modified": "2026-04-01 14:21:50",
      "commits": 6
    },
    {
      "source": "specter2-setup.qmd",
      "filename": "specter2-setup.html",
      "title": "specter2-setup",
      "engine": "quarto::html",
      "headings": [],
      "created": "2026-06-03 13:46:51",
      "modified": "2026-06-03 13:46:51",
      "commits": 1
    },
    {
      "source": "tei-server-operations.qmd",
      "filename": "tei-server-operations.html",
      "title": "tei-server-operations",
      "engine": "quarto::html",
      "headings": [],
      "created": "2026-03-15 10:43:08",
      "modified": "2026-04-01 14:21:50",
      "commits": 2
    }
  ],
  "_score": 4.447158031342219,
  "_indexed": true,
  "_nocasepkg": "openalexvectorcomp",
  "_universes": [
    "openalexpro",
    "rkrug"
  ],
  "_previous": "0.3.2",
  "_binaries": [
    {
      "r": "4.7.0",
      "os": "linux",
      "version": "0.3.3",
      "date": "2026-06-03T18:34:39.000Z",
      "distro": "noble",
      "commit": "e2eb96f95ddd6623047bf929ed540bb770bcac03",
      "fileid": "31ee9367a395458aad17ff67f590a9106d21990d87140eab65306b1fc84556d0",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/openalexpro/actions/runs/26904863901"
    },
    {
      "r": "4.6.0",
      "os": "linux",
      "version": "0.3.3",
      "date": "2026-06-03T18:34:55.000Z",
      "distro": "noble",
      "commit": "e2eb96f95ddd6623047bf929ed540bb770bcac03",
      "fileid": "34cfb9a0c89d66359be9b2f16e4ffe32cc0c74abafbeb6bba959fe9235468170",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/openalexpro/actions/runs/26904863901"
    },
    {
      "r": "4.5.3",
      "os": "mac",
      "version": "0.3.3",
      "date": "2026-06-03T18:45:16.000Z",
      "commit": "e2eb96f95ddd6623047bf929ed540bb770bcac03",
      "fileid": "a964ef470efdb11cf78a4a8f412f5e652d0940ab2e9185970cd915271986e7dd",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/openalexpro/actions/runs/26904863901"
    },
    {
      "r": "4.6.0",
      "os": "mac",
      "version": "0.3.3",
      "date": "2026-06-03T18:41:33.000Z",
      "commit": "e2eb96f95ddd6623047bf929ed540bb770bcac03",
      "fileid": "1d1a88192ad77cab6a9e0495c981bed637ee8fb900d9a40d4cd12b62f408ee62",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/openalexpro/actions/runs/26904863901"
    },
    {
      "r": "4.6.0",
      "os": "wasm",
      "version": "0.3.3",
      "date": "2026-06-03T18:35:01.000Z",
      "commit": "e2eb96f95ddd6623047bf929ed540bb770bcac03",
      "fileid": "5ed6fcd462875a489aaac0e053edd9fb23a9d6b4aacc4366bbd47d5faf2b8f28",
      "status": "success",
      "buildurl": "https://github.com/r-universe/openalexpro/actions/runs/26904863901"
    },
    {
      "r": "4.7.0",
      "os": "win",
      "version": "0.3.3",
      "date": "2026-06-03T18:34:27.000Z",
      "commit": "e2eb96f95ddd6623047bf929ed540bb770bcac03",
      "fileid": "4a1081d14f6e013259a99b3797f4f36869bd2681686e3c7a599ae42b8fe24a0a",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/openalexpro/actions/runs/26904863901"
    },
    {
      "r": "4.5.3",
      "os": "win",
      "version": "0.3.3",
      "date": "2026-06-03T18:34:40.000Z",
      "commit": "e2eb96f95ddd6623047bf929ed540bb770bcac03",
      "fileid": "c912c7a8f8bed5478fceb230c32f7fd77ebca38620680cd1e08bd26149e20a13",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/openalexpro/actions/runs/26904863901"
    },
    {
      "r": "4.6.0",
      "os": "win",
      "version": "0.3.3",
      "date": "2026-06-03T18:34:38.000Z",
      "commit": "e2eb96f95ddd6623047bf929ed540bb770bcac03",
      "fileid": "6e4d465e36f365144420733458de7399459cdcfeb3752cf7798e5955f7148a78",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/openalexpro/actions/runs/26904863901"
    }
  ]
}