At the time of writing, Orama supports Chinese (Mandarin) via a custom tokenizer, which is part of the @orama/tokenizers package.
@orama/tokenizers
To get started, make sure to install all the dependencies you need:
npm install @orama/orama @orama/tokenizers
yarn add @orama/orama @orama/tokenizers
pnpm install @orama/orama @orama/tokenizers
If you want to add Mandarin stop-words as well, install the @orama/stopwords package too:
@orama/stopwords
npm install @orama/stopwords
yarn add @orama/stopwords
pnpm install @orama/stopwords
Now you’re ready to get started with Orama:
import { create, insert, search } from "@orama/orama";import { createTokenizer } from '@orama/tokenizers/mandarin'import { stopwords as mandarinStopwords } from "@orama/stopwords/mandarin"; const db = create({ schema: { name: "string", }, components: { tokenizer: createTokenizer({ stopWords: mandarinStopwords, }), },}); insert(db, { name: "北京" }); // Beijinginsert(db, { name: "上海" }); // Shanghaiinsert(db, { name: "广州" }); // Guangzhouinsert(db, { name: "深圳" }); // Shenzheninsert(db, { name: "成都" }); // Chengduinsert(db, { name: "杭州" }); // Hangzhouinsert(db, { name: "南京" }); // Nanjinginsert(db, { name: "北京大学" }); // Peking Universityinsert(db, { name: "上海交通大学" }); // Shanghai Jiao Tong Universityinsert(db, { name: "广州中医药大学" }); // Guangzhou University of Chinese Medicine const results = search(db, { term: "广州", threshold: 0,}); console.log(results); // {// "elapsed": {// "raw": 89554625,// "formatted": "89ms"// },// "hits": [// {// "id": "36666208-3",// "score": 4.210224897276653,// "document": {// "name": "广州"// }// },// {// "id": "36666208-10",// "score": 1.9335268122510698,// "document": {// "name": "广州中医药大学"// }// }// ],// "count": 2// }