Not wanted HTML tags in snippet searching multiple columns of a table [message #478884] |
Wed, 13 October 2010 03:27 |
kastania
Messages: 19 Registered: May 2007
|
Junior Member |
|
|
I get not wanted HTML TAGS in my snippet.
Doing various tests I found out that the problem is when I need to search in multiple columns of a table.
That is when I create a user_datastore that uses a procedure that concatenates the columns.
And especially when the data with the html tags is in a VARCHAR2 column.
e.g
[code]--create the table
CREATE TABLE CONTENT_TRANS (content_trans_id NUMBER,
main_text CLOB,
title vARCHAR2(2000),
oracle_text_column VARCHAR2(1));
alter table "CONTENT_TRANS" add constraint CONTENT_PK primary key("CONTENT_TRANS_ID") ;
--Insert dummy data
Insert into CONTENT_TRANS
(CONTENT_TRANS_ID,MAIN_TEXT,TITLE)
values
(1,'lorem','lorem
qualification
2.1 ');
Insert into CONTENT_TRANS
(CONTENT_TRANS_ID,MAIN_TEXT,TITLE)
values
(2,'lorem','lorem
qualification
2.1 ');
--CREATE THE procedure that concatenates main_text(CLOB) and title(VARCHAR2)
CREATE OR REPLACE PROCEDURE CONTENT_TRANS_PROC( p_id in rowid, p_lob IN OUT clob)
IS
BEGIN
FOR c1 IN (SELECT main_text||' '||title data FROM content_trans WHERE ROWID = p_id)
LOOP
dbms_lob.copy( p_lob, c1.data,
dbms_lob.getlength( c1.data ));
END LOOP;
END;
/
--Create the user datastore
BEGIN
ctx_ddl.create_preference( 'content_trans_datastore', 'user_datastore' );
ctx_ddl.set_attribute( 'content_trans_datastore', 'procedure', 'CONTENT_TRANS_PROC' );
END;
/
--Create the index
CREATE INDEX content_trans_ot_idx ON content_trans(ORACLE_TEXT_COLUMN)
INDEXTYPE IS ctxsys.CONTEXT PARAMETERS ('datastore content_trans_datastore SYNC(ON COMMIT) filter ctxsys.null_filter section group ctxsys.html_section_group');
exec ctx_doc.set_key_type('PRIMARY_KEY');
--Perform the query
SELECT SCORE(1),ct.content_trans_id, ctx_doc.snippet('content_trans_ot_idx', ct.content_trans_id, 'lorem') as snippet
from content_trans ct
where contains(ct.ORACLE_TEXT_COLUMN, 'lorem', 1) > 1;
Results WITH NOT WANTED HTML TAGS:
6 1 <b>lorem</b> <b>lorem</b> <p>qualification</p> 2.1
6 2 <b>lorem</b> <b>lorem</b> <br>qualification</br> 2.1
[Updated on: Wed, 13 October 2010 04:14] Report message to a moderator
|
|
|
Re: Not wanted HTML tags in snippet searching multiple columns of a table [message #478930 is a reply to message #478884] |
Wed, 13 October 2010 12:34 |
|
Barbara Boehmer
Messages: 9100 Registered: November 2002 Location: California, USA
|
Senior Member |
|
|
You could create a function to strip out the html tags:
SCOTT@orcl_11gR2> SET DEFINE OFF
SCOTT@orcl_11gR2> CREATE OR REPLACE FUNCTION no_html
2 (p_string IN VARCHAR2)
3 RETURN VARCHAR2
4 AS
5 v_string_in VARCHAR2 (32767) := p_string;
6 v_string_out VARCHAR2 (32767);
7 BEGIN
8 WHILE INSTR (v_string_in, '>') > 0 LOOP
9 v_string_out := v_string_out
10 || SUBSTR (v_string_in, 1, INSTR (v_string_in, '<') - 1);
11 v_string_in := SUBSTR (v_string_in, INSTR (v_string_in, '>') + 4);
12 END LOOP;
13 v_string_out := v_string_out || v_string_in;
14 RETURN v_string_out;
15 END no_html;
16 /
Function created.
SCOTT@orcl_11gR2> SHOW ERRORS
No errors.
SCOTT@orcl_11gR2> COLUMN snippet FORMAT A50 WORD_WRAPPED
SCOTT@orcl_11gR2> SELECT SCORE(1),
2 ct.content_trans_id,
3 no_html
4 (ctx_doc.snippet
5 ('content_trans_ot_idx',
6 ct.content_trans_id,
7 'lorem')) as snippet
8 from content_trans ct
9 where contains (ct.ORACLE_TEXT_COLUMN, 'lorem', 1) > 1
10 /
SCORE(1) CONTENT_TRANS_ID SNIPPET
---------- ---------------- --------------------------------------------------
6 1 <b>lorem</b> <b>lorem</b> qualification 2.1
6 2 <b>lorem</b> <b>lorem</b> qualification 2.1
2 rows selected.
SCOTT@orcl_11gR2>
|
|
|
|