安卓调用sqlite数据库,android自定义组合控件

sqlite3使用fts3虚拟表支持全文搜索，默认支持simple和porter两种分词器，并提供接口自定义分词器。这里用mmseg构建自定义中文分词器。

sqlite在fts3_tokenizer.h中提供了各种用于用户定制分词器的接口，但不提供用于注册用户定制分词器的c函数。分词器的注册必须使用sql语句进行。

SELECTfts3_tokenizer (，

其中tokenizer-name是分词器的名称，sqlite3_tokenizer_moduleptr只具有一个指向sqlite3_tokenizer_module结构的指针，作为SQLblob 以下是官方提供的注册函数。

int registerTokenizer (

sqlite3 *db数据库，

char *zName，

const sqlite3 _ tokenizer _ module * p

() )。

intrc；

sqlite3_stmt*pStmt；

const char * zsql=' select FTS3_ tokenizer？ () )；

RC=SQLite3_prepare_v2(db，zSql，-1，pStmt，0 )；

if(RC！=SQLITE_OK ) {

返回RC；

}

SQLite3_bind_text(pstmt，1，zName，-1，SQLITE_STATIC )；

SQLite3_bind_blob(pstmt，2，p，sizeof(p )，SQLITE_STATIC )；

sqlite3_step(pstmt；

returnSQLite3_finalize(pstmt；

}

实现定制分词器最重要的是得到指向sqlite3_tokenizer_module结构的指针。 sqlite3_tokenizer_module结构定义如下：

struct sqlite3 _ tokenizer _ module {

int iVersion； //版本号，需要设定为0

int(*xcreate )//创建虚拟表时自动调用并创建分词器

intargc，const char*const*argv，sqlite3_tokenizer**ppTokenizer；

int(xdestroy ) ) sqlite3 _ tokenizer (p tokenizer )； //数据库连接关闭时自动调用并销毁资源

插入或搜索int(*xopen )//数据时自动调用并分开写

sqlite3_tokenizer*pTokenizer，const char*pInput，intnBytes，sqlite3 _ tokenizer _ cursor * * pcursor )；

int(xclose ) ) sqlite3 _ tokenizer _ cursor * pcursor )； //分词结果提取结束后自动调用

int(*xnext ) )//将分词结果逐一提取

sqlite3 _ tokenizer _ cursor * pcursor、const char**ppToken、int *pnBytes，

int*piStartOffset，int*piEndOffset，int * pi位置；

(；

我有几个需要注意的事情。

分词引擎使用sql语句注册，意味着每次建立sqlite连接都必须注册分词器，对于需要使用词典的中文分词器来说也意味着巨大的内存消耗。

2检索时分词结果的提取和语义的解析表达式交替进行。例如，检索' kanif ORsqlite '时，引擎首先将一切传递给分词器，调用一次next获取词kanif，然后将词sqlite传递给分词器，等待所有分析完成。

3由于汉语分词本身的特殊性，例如“北京市”很可能被认为是一个完整的词，搜索“北京”时没有结果。如果分词器支持将“北京市”拆分为“北京市”和“北京”，或者支持将11月拆分为“11月”和“11”，则返回*xNext (函数的piStartOffset和piendoot ) 在经过测试后插入数据时，这两个参数不实用，但在查询时，这两个参数决定以下输入列：

附件：

#包含

#include'fts3_tokenizer.h '

#include 'mmseg/mmseg.cpp '

静态布尔加载DIC=true；

typedef struct cus_tokenizer{

sql

ite3_tokenizer base;

} cus_tokenizer;

typedef struct cus_tokenizer_cursor{

sqlite3_tokenizer_cursor base;

char *pInput;

int nBytes;

int iToken;

char *pToken;

rmmseg::Algorithm *pAlgor;

} cus_tokenizer_cursor;

void initmmseg(void){

if(!loadDic)

return;

mmseg_load_words("chars.dic");

mmseg_load_words("words.dic");

loadDic =False;

}

static int cusCreate(

int argc, const char * const *argv,

sqlite3_tokenizer **ppTokenizer

){

cus_tokenizer *t;

t = (cus_tokenizer *) sqlite3_malloc(sizeof(*t));

if( t==NULL ) return SQLITE_NOMEM;

memset(t, 0, sizeof(*t));

initmmseg();

*ppTokenizer = &t->base;

return SQLITE_OK;

}

static intcusDestroy(sqlite3_tokenizer *pTokenizer){

sqlite3_free(pTokenizer);

return SQLITE_OK;

}

static int cusOpen(

sqlite3_tokenizer*pTokenizer, const char *pInput, intnBytes, sqlite3_tokenizer_cursor**ppCursor ){

cus_tokenizer_cursor *c;

if(pInput == 0){

nBytes =0;

}else if(nBytes < 0)

nBytes = (int)strlen(pInput);

c = (cus_tokenizer_cursor *)sqlite3_malloc(sizeof(*c));

if(c == NULL)

return SQLITE_NOMEM;

c->iToken =c->nBytes = 0;

c->pInput = c->pToken =NULL;

c->pAlgor = mmseg_algor_create(pInput,nBytes);

c->nBytes = nBytes;

*ppCursor = &c->base;

return SQLITE_OK;

}

static intcusClose(sqlite3_tokenizer_cursor *pCursor){

cus_tokenizer_cursor *c = (cus_tokenizer_cursor *)pCursor;

if(c->pInput != NULL){

sqlite3_free(c->pInput);

}

if(c->pToken != NULL){

sqlite3_free(c->pToken);

}

if(c->pAlgor != NULL){

mmseg_algor_destroy(c->pAlgor);

}

c->pInput = c->pToken =NULL;

c->pAlgor = NULL;

sqlite3_free(c);

return SQLITE_OK;

}

static int cusNext(

sqlite3_tokenizer_cursor *pCursor,

const char**ppToken, int*pnBytes, int*piStartOffset, int*piEndOffset, int*piPosition ){

cus_tokenizer_cursor *c = (cus_tokenizer_cursor *)pCursor;

cus_tokenizer *t = (cus_tokenizer *)pCursor->pTokenizer;

if(c->pToken != NULL){

sqlite3_free(c->pToken);

c->pToken = NULL;

}

struct Token token =mmseg_next_token(c->pAlgor);

if(token.length != 0 ){

int l =token.length;

c->pToken = (char *)sqlite3_malloc(l+1);

if(c->pToken == NULL)

return SQLITE_NOMEM;

c->pToken[l] = 0;

memcpy(c->pToken, token.text, l);

*ppToken =c->pToken;

*pnBytes =l;

*piStartOffset = token.offset;

*piEndOffset= token.offset + token.length;

*piPosition= c->iToken++;

returnSQLITE_OK;

}

//一般来说只有插入数据时才会进入到这里

return SQLITE_DONE;

}

static const sqlite3_tokenizer_module cusTokenizerModule ={

cusCreate,

cusDestroy,

cusOpen,

cusClose,

cusNext,

};

int registerTokenizer(

sqlite3 *db,

char *zName,

const sqlite3_tokenizer_module *p

){

intrc;

sqlite3_stmt*pStmt;

const char*zSql = "SELECT fts3_tokenizer(?, ?)";

rc =sqlite3_prepare_v2(db, zSql, -1, &pStmt,0);

if(rc!=SQLITE_OK ){

return rc;

}

sqlite3_bind_text(pStmt, 1, zName, -1, SQLITE_STATIC);

sqlite3_bind_blob(pStmt, 2, &p, sizeof(p),SQLITE_STATIC);

sqlite3_step(pStmt);

returnsqlite3_finalize(pStmt);

}

int main(){

constsqlite3_tokenizer_module *ptr =&cusTokenizerModule;

sqlite3*pDB;

sqlite3_stmt* stmt;

char *errMsg = NULL;

const char*zTail;

int rc =sqlite3_open("test.sqlite3", &pDB);

if(rc){

printf("create error. %sn",sqlite3_errmsg(pDB));

return rc;

}

chartoken_name[] = "custoken";

registerTokenizer(pDB, token_name, ptr);

rc =sqlite3_exec(pDB, "CREATE VIRTUAL TABLE foo USINGfts3(tokenize=custoken)", 0, 0, &errMsg); if(rc !=SQLITE_OK){ printf("create virtual error, %sn", errMsg); if(rc !=SQLITE_OK){ printf("create virtual error, %sn", errMsg); return rc; } rc =sqlite3_exec(pDB, "INSERT INTO fooVALUES('xe5x8cx97xe4xbaxacxe5xb8x82')", 0, 0,&errMsg); if(rc !=SQLITE_OK){ printf("insert value error, %sn", errMsg); return rc; } int nrow =0, ncolumn = 0; char**azResult; //二维数组存放结果 sqlite3_get_table(pDB , "SELECT * FROM foo WHERE content MATCH'xe5x8cx97xe4xbaxacxe5xb8x82'" , &azResult, &nrow , &ncolumn ,&errMsg ); int i = 0; printf("row:%d column=%d n" , nrow , ncolumn ); printf("nThe result of querying is : n" ); for( i=0 ;i