{ "name": "kham_pg", "abstract": "Thai word-segmentation FTS parser — tsvector, soundex, RTGS romanization, NER", "description": "kham_pg is a PostgreSQL text-search parser for the Thai language. Thai has no spaces between words, so standard PostgreSQL parsers produce incorrect token boundaries. kham_pg uses the kham newmm segmentation engine to split Thai text correctly, then expands each token into up to three lexemes at the same tsvector position: the normalised word, its lk82 Thai Soundex code (phonetic-fuzzy search), and its RTGS romanization (Latin-script search). Named entities (persons, places, organisations) are tagged automatically. Supports to_tsvector, plainto_tsquery, ts_rank, ts_headline, and GIN/GiST indexes. Tested on PostgreSQL 14–18.", "tags": ["thai", "full-text-search", "nlp", "tokenizer", "soundex", "romanization", "named-entity", "parser"], "version": "0.8.2", "maintainer": [ "Preedee Ponchevin " ], "license": "mit", "prereqs": { "runtime": { "requires": { "PostgreSQL": "0" } } }, "provides": { "kham_pg": { "abstract": "Thai word-segmentation FTS parser — tsvector, soundex, RTGS romanization, NER", "file": "sql/kham_pg--0.8.2.sql", "version": "0.8.2" } }, "resources": { "homepage": "https://github.com/preedep/kham", "bugtracker": { "web": "https://github.com/preedep/kham/issues" }, "repository": { "url": "https://github.com/preedep/kham.git", "web": "https://github.com/preedep/kham", "type": "git" } }, "generated_by": "Preedee Ponchevin", "meta-spec": { "version": "1.0.0", "url": "http://pgxn.org/meta/spec.txt" } }