{
  "_id": "6a15659cacfb0bcc41d61a6a",
  "Package": "NUSS",
  "Title": "Mixed N-Grams and Unigram Sequence Segmentation",
  "Version": "0.1.0",
  "Authors@R": "person(\"Oskar\", \"Kosch\", , \"contact@oskarkosch.com\", role = c(\"aut\", \"cre\"),\ncomment = c(ORCID = \"0000-0003-2697-1393\"))",
  "Description": "Segmentation of short text sequences - like hashtags -\ninto the separated words sequence, done with the use of\ndictionary, which may be built on custom corpus of texts.\nUnigram dictionary is used to find most probable sequence, and\nn-grams approach is used to determine possible segmentation\ngiven the text corpus.",
  "License": "GPL (>= 3)",
  "URL": "https://github.com/theogrost/NUSS",
  "BugReports": "https://github.com/theogrost/NUSS/issues",
  "Config/testthat/edition": "3",
  "Encoding": "UTF-8",
  "Language": "en",
  "LazyData": "true",
  "Roxygen": "list(markdown = TRUE)",
  "RoxygenNote": "7.3.1",
  "Config/pak/sysreqs": "libicu-dev",
  "Repository": "https://theogrost.r-universe.dev",
  "Date/Publication": "2024-07-31 10:40:05 UTC",
  "RemoteUrl": "https://github.com/theogrost/nuss",
  "RemoteRef": "HEAD",
  "RemoteSha": "2e104423fab7ec8a0fe241f285f9107a6c7865f2",
  "NeedsCompilation": "yes",
  "Packaged": {
    "Date": "2026-05-26 09:11:58 UTC",
    "User": "root"
  },
  "Author": "Oskar Kosch [aut, cre] (ORCID: <https://orcid.org/0000-0003-2697-1393>)",
  "Maintainer": "Oskar Kosch <contact@oskarkosch.com>",
  "MD5sum": "35800c912d2a386b306cf24bdb5628c6",
  "_user": "theogrost",
  "_type": "src",
  "_file": "NUSS_0.1.0.tar.gz",
  "_fileid": "4b802c9dc7e1bc23150dd1f87a2fa38cadd46ce74ab6be069d21c05f83e07c9f",
  "_filesize": 430739,
  "_sha256": "4b802c9dc7e1bc23150dd1f87a2fa38cadd46ce74ab6be069d21c05f83e07c9f",
  "_created": "2026-05-26T09:11:58.000Z",
  "_published": "2026-05-26T09:19:24.951Z",
  "_distro": "noble",
  "_jobs": [
    {
      "job": 77843102430,
      "time": 170,
      "config": "linux-devel-arm64",
      "r": "4.7.0",
      "check": "OK",
      "artifact": "7213227618"
    },
    {
      "job": 77843102348,
      "time": 150,
      "config": "linux-devel-x86_64",
      "r": "4.7.0",
      "check": "OK",
      "artifact": "7213220687"
    },
    {
      "job": 77843102412,
      "time": 158,
      "config": "linux-release-arm64",
      "r": "4.6.0",
      "check": "OK",
      "artifact": "7213223146"
    },
    {
      "job": 77843102453,
      "time": 152,
      "config": "linux-release-x86_64",
      "r": "4.6.0",
      "check": "OK",
      "artifact": "7213221418"
    },
    {
      "job": 77843102460,
      "time": 161,
      "config": "macos-oldrel-arm64",
      "r": "4.5.3",
      "check": "OK",
      "artifact": "7213215099"
    },
    {
      "job": 77843102436,
      "time": 263,
      "config": "macos-oldrel-x86_64",
      "r": "4.5.3",
      "check": "OK",
      "artifact": "7213253223"
    },
    {
      "job": 77843102340,
      "time": 164,
      "config": "macos-release-arm64",
      "r": "4.6.0",
      "check": "OK",
      "artifact": "7213213003"
    },
    {
      "job": 77843102414,
      "time": 405,
      "config": "macos-release-x86_64",
      "r": "4.6.0",
      "check": "OK",
      "artifact": "7213296676"
    },
    {
      "job": 77842556460,
      "time": 196,
      "config": "source",
      "r": "4.6.0",
      "check": "OK",
      "artifact": "7213166811"
    },
    {
      "job": 77843102334,
      "time": 128,
      "config": "wasm-release",
      "r": "4.6.0",
      "check": "OK",
      "artifact": "7213212458"
    },
    {
      "job": 77843102506,
      "time": 141,
      "config": "windows-devel",
      "r": "4.7.0",
      "check": "OK",
      "artifact": "7213217380"
    },
    {
      "job": 77843102345,
      "time": 129,
      "config": "windows-oldrel",
      "r": "4.5.3",
      "check": "OK",
      "artifact": "7213212968"
    },
    {
      "job": 77843102387,
      "time": 166,
      "config": "windows-release",
      "r": "4.6.0",
      "check": "OK",
      "artifact": "7213226263"
    }
  ],
  "_buildurl": "https://github.com/r-universe/theogrost/actions/runs/26443150887",
  "_status": "success",
  "_host": "GitHub-Actions",
  "_upstream": "https://github.com/theogrost/nuss",
  "_commit": {
    "id": "2e104423fab7ec8a0fe241f285f9107a6c7865f2",
    "author": "theogrost <35686723+theogrost@users.noreply.github.com>",
    "committer": "theogrost <35686723+theogrost@users.noreply.github.com>",
    "message": "Fixed arguments\n",
    "time": 1722422405
  },
  "_maintainer": {
    "name": "Oskar Kosch",
    "email": "contact@oskarkosch.com",
    "orcid": "0000-0003-2697-1393"
  },
  "_registered": true,
  "_dependencies": [
    {
      "package": "R",
      "version": ">= 3.5",
      "role": "Depends"
    },
    {
      "package": "BH",
      "role": "LinkingTo"
    },
    {
      "package": "Rcpp",
      "role": "LinkingTo"
    },
    {
      "package": "dplyr",
      "role": "Imports"
    },
    {
      "package": "magrittr",
      "role": "Imports"
    },
    {
      "package": "Rcpp",
      "role": "Imports"
    },
    {
      "package": "stringr",
      "role": "Imports"
    },
    {
      "package": "text2vec",
      "role": "Imports"
    },
    {
      "package": "textclean",
      "role": "Imports"
    },
    {
      "package": "utils",
      "role": "Imports"
    },
    {
      "package": "testthat",
      "version": ">= 3.0.0",
      "role": "Suggests"
    }
  ],
  "_owner": "theogrost",
  "_selfowned": true,
  "_usedby": 0,
  "_updates": [],
  "_tags": [],
  "_stars": 0,
  "_contributors": [
    {
      "user": "theogrost",
      "count": 12,
      "uuid": 35686723
    }
  ],
  "_userbio": {
    "uuid": 35686723,
    "type": "user",
    "name": "theogrost"
  },
  "_downloads": {
    "count": 145,
    "source": "https://cranlogs.r-pkg.org/downloads/total/last-month/NUSS"
  },
  "_devurl": "https://github.com/theogrost/nuss",
  "_searchresults": 8,
  "_topics": [
    "cpp"
  ],
  "_rbuild": "4.6.0",
  "_assets": [
    "extra/citation.cff",
    "extra/citation.html",
    "extra/citation.json",
    "extra/citation.txt",
    "extra/contents.json",
    "extra/NUSS.html",
    "extra/readme.html",
    "extra/readme.md",
    "manual.pdf"
  ],
  "_homeurl": "https://github.com/theogrost/nuss",
  "_realowner": "theogrost",
  "_cranurl": true,
  "_releases": [
    {
      "version": "0.1.0",
      "date": "2024-08-19"
    }
  ],
  "_exports": [
    "igrepl",
    "ngrams_dictionary",
    "ngrams_segmentation",
    "nuss",
    "unigram_dictionary",
    "unigram_sequence_segmentation"
  ],
  "_datasets": [
    {
      "name": "base_dictionary",
      "title": "Base dictionary with unigrams",
      "object": "base_dictionary",
      "class": [
        "data.frame"
      ],
      "fields": [
        "to_search",
        "to_replace",
        "id",
        "points"
      ],
      "rows": 62983,
      "table": true,
      "tojson": true
    }
  ],
  "_help": [
    {
      "page": "base_dictionary",
      "title": "Base dictionary with unigrams",
      "topics": [
        "base_dictionary"
      ]
    },
    {
      "page": "igrepl",
      "title": "Perform inverse regex search (C++)",
      "topics": [
        "igrepl"
      ]
    },
    {
      "page": "ngrams_dictionary",
      "title": "Create n-grams dictionary",
      "topics": [
        "ngrams_dictionary"
      ]
    },
    {
      "page": "ngrams_segmentation",
      "title": "Segmenting sequences with n-grams.",
      "topics": [
        "ngrams_segmentation"
      ]
    },
    {
      "page": "nuss",
      "title": "Mixed N-Grams and Unigram Sequence Segmentation (NUSS) function",
      "topics": [
        "nuss"
      ]
    },
    {
      "page": "unigram_dictionary",
      "title": "Create unigram dictionary",
      "topics": [
        "unigram_dictionary"
      ]
    },
    {
      "page": "unigram_sequence_segmentation",
      "title": "Segmenting sequences with unigrams",
      "topics": [
        "unigram_sequence_segmentation"
      ]
    }
  ],
  "_readme": "https://github.com/theogrost/nuss/raw/HEAD/README.md",
  "_rundeps": [
    "BH",
    "cli",
    "cpp11",
    "data.table",
    "digest",
    "dplyr",
    "dtt",
    "english",
    "float",
    "generics",
    "glue",
    "lattice",
    "lexicon",
    "lgr",
    "lifecycle",
    "magrittr",
    "Matrix",
    "MatrixExtra",
    "mgsub",
    "mlapi",
    "NLP",
    "pillar",
    "pkgconfig",
    "purrr",
    "qdapRegex",
    "R6",
    "Rcpp",
    "RcppArmadillo",
    "RhpcBLASctl",
    "rlang",
    "rsparse",
    "slam",
    "stringi",
    "stringr",
    "syuzhet",
    "text2vec",
    "textclean",
    "textshape",
    "tibble",
    "tidyr",
    "tidyselect",
    "utf8",
    "vctrs",
    "withr",
    "zoo"
  ],
  "_sysdeps": [
    {
      "shlib": "libstdc++",
      "package": "libstdc++6",
      "source": "gcc",
      "version": "14.2.0-4ubuntu2~24.04.1",
      "name": "c++",
      "homepage": "http://gcc.gnu.org/",
      "description": "GNU Standard C++ Library v3"
    }
  ],
  "_score": 2.6989700043360187,
  "_indexed": true,
  "_nocasepkg": "nuss",
  "_universes": [
    "theogrost"
  ],
  "_binaries": [
    {
      "r": "4.7.0",
      "os": "linux",
      "version": "0.1.0",
      "date": "2026-05-26T09:14:50.000Z",
      "distro": "noble",
      "arch": "aarch64",
      "commit": "2e104423fab7ec8a0fe241f285f9107a6c7865f2",
      "fileid": "0e7edc82d593d15230f09ba7a759a2847ca26408f17e6e8dd91f1ab19f5ec0c6",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/theogrost/actions/runs/26443150887"
    },
    {
      "r": "4.7.0",
      "os": "linux",
      "version": "0.1.0",
      "date": "2026-05-26T09:14:27.000Z",
      "distro": "noble",
      "arch": "x86_64",
      "commit": "2e104423fab7ec8a0fe241f285f9107a6c7865f2",
      "fileid": "595975ffe5f15b25fef4f7c9c90ca79eaa1ad5b05235e787fc5de1f357b43983",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/theogrost/actions/runs/26443150887"
    },
    {
      "r": "4.6.0",
      "os": "linux",
      "version": "0.1.0",
      "date": "2026-05-26T09:14:36.000Z",
      "distro": "noble",
      "arch": "aarch64",
      "commit": "2e104423fab7ec8a0fe241f285f9107a6c7865f2",
      "fileid": "a6ab7e28bc12ea60abe868c77839203e7f0668c6dca5e8f4dfe88a0e2d93941f",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/theogrost/actions/runs/26443150887"
    },
    {
      "r": "4.6.0",
      "os": "linux",
      "version": "0.1.0",
      "date": "2026-05-26T09:14:29.000Z",
      "distro": "noble",
      "arch": "x86_64",
      "commit": "2e104423fab7ec8a0fe241f285f9107a6c7865f2",
      "fileid": "bf783bbbd04c5b015aeee12eea357d8592bb5cdd79c15741fbd72a3559c2908b",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/theogrost/actions/runs/26443150887"
    },
    {
      "r": "4.5.3",
      "os": "mac",
      "version": "0.1.0",
      "date": "2026-05-26T09:14:16.000Z",
      "arch": "aarch64",
      "commit": "2e104423fab7ec8a0fe241f285f9107a6c7865f2",
      "fileid": "bcb485cec83f65eb31ebc672ab06797857d8af386d339b4d2da5af73814101ee",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/theogrost/actions/runs/26443150887"
    },
    {
      "r": "4.5.3",
      "os": "mac",
      "version": "0.1.0",
      "date": "2026-05-26T09:15:41.000Z",
      "arch": "x86_64",
      "commit": "2e104423fab7ec8a0fe241f285f9107a6c7865f2",
      "fileid": "cd5a962d609979c481e8d7efcb373233b282b985facdce95384ad75ac5985144",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/theogrost/actions/runs/26443150887"
    },
    {
      "r": "4.6.0",
      "os": "mac",
      "version": "0.1.0",
      "date": "2026-05-26T09:14:00.000Z",
      "arch": "aarch64",
      "commit": "2e104423fab7ec8a0fe241f285f9107a6c7865f2",
      "fileid": "df8c21429f0851f7ea3035269efe72bc9dea3f4c1ab098e62585fdf8b28057aa",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/theogrost/actions/runs/26443150887"
    },
    {
      "r": "4.6.0",
      "os": "mac",
      "version": "0.1.0",
      "date": "2026-05-26T09:17:16.000Z",
      "arch": "x86_64",
      "commit": "2e104423fab7ec8a0fe241f285f9107a6c7865f2",
      "fileid": "ea23a67d78ab8ed7ab5874209d64d2cf4a0ced23489a04015914c2a08a648d7c",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/theogrost/actions/runs/26443150887"
    },
    {
      "r": "4.6.0",
      "os": "wasm",
      "version": "0.1.0",
      "date": "2026-05-26T09:14:24.000Z",
      "arch": "emscripten",
      "commit": "2e104423fab7ec8a0fe241f285f9107a6c7865f2",
      "fileid": "a7726bb1aa810928945e35376da570e60a50e98a15c41f651f93b0472fdba3c5",
      "status": "success",
      "buildurl": "https://github.com/r-universe/theogrost/actions/runs/26443150887"
    },
    {
      "r": "4.7.0",
      "os": "win",
      "version": "0.1.0",
      "date": "2026-05-26T09:13:45.000Z",
      "arch": "x86_64",
      "commit": "2e104423fab7ec8a0fe241f285f9107a6c7865f2",
      "fileid": "4906513ec04e01a1a677c211248be6d1721e5193a6f8c323ba60b7fcefccd391",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/theogrost/actions/runs/26443150887"
    },
    {
      "r": "4.5.3",
      "os": "win",
      "version": "0.1.0",
      "date": "2026-05-26T09:13:36.000Z",
      "arch": "x86_64",
      "commit": "2e104423fab7ec8a0fe241f285f9107a6c7865f2",
      "fileid": "5d135b29bb5ff8a4a6ed0b231e712f3f9f195aedb3325c59e9de59c540ca4517",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/theogrost/actions/runs/26443150887"
    },
    {
      "r": "4.6.0",
      "os": "win",
      "version": "0.1.0",
      "date": "2026-05-26T09:14:07.000Z",
      "arch": "x86_64",
      "commit": "2e104423fab7ec8a0fe241f285f9107a6c7865f2",
      "fileid": "59cc524813ab1982dd7560846798655da32d4d43cfc8f41fd58394c99239f9b5",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/theogrost/actions/runs/26443150887"
    }
  ]
}