RTFEditorKit is apparently not a complete implementation of the RTF specification, but if your text is generated by RTFEditorKit this should work. I find that it is far faster than Oracle Text when converting text from multiple records on the fly. I was helped in this by
http://bit.ly/1G6ArH and chapter 19 of Tom's Oracle Expert book.
I would be happy for improvements to this as I am not a java programmer.
The following was run on Oracle 9.2.0.8.0
ORA92080>CREATE TABLE test_rtf
2 AS
3 SELECT 1 AS id,
4 '{\rtf1\ansi\ansicpg1252\deff0\deflang1033'||
5 '{\fonttbl{\f0\fnil\fcharset0 MS Sans Serif;}'||
6 '{\f1\fswiss\fprq2\fcharset0 System;}}\viewkind4'||
7 '\uc1\pard\f0\fs20 This is a sample rec'||
8 'eiving comment\b\f1\fs20\par }' AS rtf_text
9 FROM DUAL
10 UNION ALL
11 SELECT 2,
12 '{\rtf1\ansi\ansicpg1252\deff0{\fonttbl{\f0\fnil\'||
13 'fcharset0 Arial;}}'||CHR(13)||CHR(10)||
14 '\viewkind4\uc1\pard\lang1033\fs17 this is a test'||
15 CHR(13)||CHR(10)||'\par }'
16 FROM DUAL
17 UNION ALL
18 SELECT 3,
19 '{\rtf1\ansi\deff0{\fonttbl{\f0\fnil\fcharset0 Courier New;}}'||
20 '{\colortbl ;\red0\green0\blue255;}'||
21 '{\*\generator Msftedit'||
22 '5.41.15.1507;}\viewkind4\uc1\pard\lang1033\ul\b\i\f0\fs20 This is a test.\par'||
23 '\cf1\ulnone\i0 This is a test.\cf0\b0\par'||
24 '\par'||
25 '\par'||
26 '\par'||
27 '\par'||
28 '}'
29 FROM DUAL
30 UNION ALL
31 SELECT 4, NULL
32 FROM DUAL;
Table created.
ORA92080>CREATE OR REPLACE PACKAGE rtf
2 IS
3 PROCEDURE convertToText ( rtf_in IN VARCHAR2, plain_text OUT VARCHAR2 )
4 IS
5 language java
6 name 'rtf.convertToText( java.lang.String, java.lang.String[] )';
7
8 FUNCTION to_text ( rtf_in IN VARCHAR2 )
9 RETURN VARCHAR2
10 IS
11 language java
12 name 'rtf.to_text( java.lang.String ) return java.lang.String';
13
14 END rtf;
15 /
Package created.
ORA92080> CREATE OR REPLACE AND COMPILE
2 JAVA SOURCE NAMED "rtf"
3 AS
4 import javax.swing.text.BadLocationException;
5 import javax.swing.text.Document;
6 import javax.swing.text.rtf.RTFEditorKit;
7 import java.io.*;
8
9 public class rtf extends Object
10 {
11 public static void convertToText( java.lang.String p_in,
12 java.lang.String[] p_out )
13 throws IOException, BadLocationException
14 {
15 // test for null inputs to avoid java.lang.NullPointerException when input is null
16 if ( p_in != null )
17 { RTFEditorKit kit = new RTFEditorKit();
18 Document doc = kit.createDefaultDocument();
19 kit.read(new StringReader(p_in), doc, 0);
20 p_out[0] = doc.getText(0, doc.getLength());
21 }
22 else p_out[0] = null;
23 }
24
25 public static String to_text(String p_in)
26 throws IOException, BadLocationException
27 {
28 // test for null inputs to avoid java.lang.NullPointerException when input is null
29 if (p_in != null)
30 { RTFEditorKit kit = new RTFEditorKit();
31 Document doc = kit.createDefaultDocument();
32 kit.read(new StringReader(p_in), doc, 0);
33 return doc.getText(0, doc.getLength());
34 }
35 else return null;
36 }
37 }
38 /
Java created.
ORA92080>SET LINESIZE 90
ORA92080>COLUMN id FORMAT 90
ORA92080>COLUMN rtf_text FORMAT A59 WORD_WRAP
ORA92080>COLUMN plain_text FORMAT A20 WORD_WRAP
ORA92080>SELECT id
2 , rtf_text
3 , rtf.to_text( rtf_text ) AS plain_text
4 FROM test_rtf;
ID RTF_TEXT PLAIN_TEXT
--- ----------------------------------------------------------- --------------------
1 {\rtf1\ansi\ansicpg1252\deff0\deflang1033{\fonttbl{\f0\fnil This is a sample
\fcharset0 MS Sans Serif;}{\f1\fswiss\fprq2\fcharset0 receiving comment
System;}}\viewkind4\uc1\pard\f0\fs20 This is a sample
receiving comment\b\f1\fs20\par }
2 {\rtf1\ansi\ansicpg1252\deff0{\fonttbl{\f0\fnil\fcharset0 this is a test
Arial;}}
\viewkind4\uc1\pard\lang1033\fs17 this is a test
\par }
3 {\rtf1\ansi\deff0{\fonttbl{\f0\fnil\fcharset0 Courier This is a test.
New;}}{\colortbl ;\red0\green0\blue255;}{\*\generator This is a test.
Msftedit5.41.15.1507;}\viewkind4\uc1\pard\lang1033\ul\b\i\f
0\fs20 This is a test.\par\cf1\ulnone\i0 This is a
test.\cf0\b0\par\par\par\par\par}
4
4 rows selected.
Elapsed: 00:00:00.01
ORA92080>DECLARE
2 l_text_out VARCHAR2(4000);
3 BEGIN
4 rtf.convertToText
5 (
6 '{\rtf1\ansi\ansicpg1252\deff0\deflang1033{\fonttbl'||
7 '{\f0\fnil\fcharset0 MS Sans Serif;}'||
8 '{\f1\fswiss\fprq2\fcharset0 System;}}'||
9 '\viewkind4\uc1\pard\f0\fs20 This is a sample rec'||
10 'eiving comment\b\f1\fs20\par }'
11 , l_text_out
12 );
13
14 DBMS_OUTPUT.PUT_LINE( l_text_out );
15
16 END;
17 /
This is a sample receiving comment
PL/SQL procedure successfully completed.