Great!
Raymond Fitzgerald, August 01, 2003 - 11:03 am UTC
Now I need to change a couple of things in my code.
Here was my first draft which works pretty good:
CREATE OR REPLACE function StrTranslate (token in varchar2 default null)
return varchar2
as
TYPE v_bin_var1 IS TABLE OF varchar2(2000) INDEX BY BINARY_INTEGER;
v_value_string v_bin_var1;
build_string varchar2(4000) := NULL;
v_count number;
token_temp varchar2(2000);
inPhrase number := 0;
len number := 0;
temp_token varchar2(4000);
char varchar2(1);
startPhrase number :=0;
BEGIN
if token is null then
build_string := null;
else
-- get tokens (white spaces)
if instr(token,' ')>0 then
-- populate the array
token_temp := rtrim(ltrim(upper(token)));
v_count := 1;
while instr(token_temp,' ')>0 loop
v_value_string(v_count) := substr(token_temp,1,instr(token_temp,' ')-1);
v_count := v_count + 1;
token_temp := substr(token_temp,instr(token_temp,' ')+1);
token_temp :=ltrim(token_temp);
end loop;
-- get the last one.
if substr(token_temp,length(token_temp))='\' or
substr(token_temp,length(token_temp))='?' or
substr(token_temp,length(token_temp))=';' then
token_temp := substr(token_temp,1,length(token_temp)-1)||'\'||substr(token_temp,length(token_temp));
end if;
v_value_string(v_count):=token_temp;
else
-- only one element in array
token_temp := token;
if substr(token_temp,length(token_temp))='\' or
substr(token_temp,length(token_temp))='?' or
substr(token_temp,length(token_temp))=';' then
token_temp := substr(token_temp,1,length(token_temp)-1)||'\'||substr(token_temp,length(token_temp));
end if;
v_value_string(1) := upper(token_temp);
end if;
-- string parsed
v_count := 0;
for i in v_value_string.first..v_value_string.last loop
v_count := v_count + 1;
temp_token := NULL;
len := length(v_value_string(i));
-- we iterate over the string to find special web operators
if instr(v_value_string(i),'"')>0 then
for k in 1..len loop
char := substr(v_value_string(i),k,1);
if (char = '"') then
if(inPhrase = 0) then
inPhrase := 1;
startPhrase := 1;
temp_token := temp_token||'{';
else
inPhrase := 0;
temp_token := temp_token||'}';
end if;
else
temp_token := temp_token||char;
end if;
end loop;
if build_string is not null then
if (startPhrase=1) then
-- in a phrase but done
startPhrase := 0;
if (v_value_string(i-1) = 'AND' or
v_value_string(i-1) = '&' or
v_value_string(i-1) = 'OR' or
v_value_string(i-1) = '|' or
v_value_string(i-1) = 'NOT' or
v_value_string(i-1) = '~' or
v_value_string(i-1) = 'NEAR' or
v_value_string(i-1) = ';' or
v_value_string(i-1) = 'ACCUM') then
build_string := build_string||' '||temp_token;
else
build_string := build_string||' AND '||temp_token;
end if;
else
build_string := build_string||' '||temp_token;
end if;
else
startPhrase := 0;
build_string := temp_token;
end if;
else
if (inPhrase=0) then
-- not in a phrase
if v_value_string(i) = 'AND' or
v_value_string(i) = '&' or
v_value_string(i) = 'OR' or
v_value_string(i) = '|' or
v_value_string(i) = 'NOT' or
v_value_string(i) = '~' or
v_value_string(i) = 'NEAR' or
v_value_string(i) = ';' or
v_value_string(i) = 'ACCUM' then
build_string := build_string||' '||v_value_string(i);
elsif (v_count <> 1) then
if (v_value_string(i-1) = 'AND' or
v_value_string(i-1) = '&' or
v_value_string(i-1) = 'OR' or
v_value_string(i-1) = '|' or
v_value_string(i-1) = 'NOT' or
v_value_string(i-1) = '~' or
v_value_string(i-1) = 'NEAR' or
v_value_string(i-1) = ';' or
v_value_string(i-1) = 'ACCUM') then
build_string := build_string||' '||v_value_string(i);
elsif (v_value_string(i) = 'ABOUT') then
build_string := build_string||' AND {'||v_value_string(i)||'}';
else
build_string := build_string||' AND '||v_value_string(i);
end if;
else
if v_value_string(i) = 'ABOUT' then
build_string := '{'||v_value_string(i)||'}';
else
build_string := v_value_string(i);
end if;
end if;
else
-- in a Phrase
build_string := build_string||' '||v_value_string(i);
end if;
end if;
end loop;
if inPhrase=1 then
-- missing end of phrase
build_string := build_string||'}';
end if;
build_string := ltrim(build_string);
if substr(build_string,1,5) = 'NEAR ' then
build_string := '{'||build_string||'}';
end if;
end if;
if substr(build_string,length(build_string)-1)='{}' then
build_string := substr(build_string,1,length(build_string)-2);
end if;
return(build_string);
end StrTranslate;
/
How to parse parenthesis
taro, February 17, 2009 - 9:28 am UTC
Hi
I want to enable users to input complex search terms and therefore would like to keep 'OR' and parenthesis, if they are present in the arugument. So far I have modified the function provided here to accomplish this requiremet as below, but couldn't get to support the nested parentheses. Could you give me some idea/instruction how I can implement this functionality?
Thanks,
create or replace
function parse_search_string(
token IN VARCHAR2 DEFAULT NULL)
RETURN VARCHAR2
AS
TYPE v_bin_var1
IS
TABLE OF VARCHAR2(2000) INDEX BY BINARY_INTEGER;
v_value_string v_bin_var1;
build_string VARCHAR2(4000) := NULL;
v_count NUMBER;
token_temp VARCHAR2(2000);
inPhrase NUMBER := 0;
LEN NUMBER := 0;
temp_str VARCHAR2(4000);
cur_char VARCHAR2(1);
startPhrase NUMBER :=0;
BEGIN
IF NVL(LENGTH(token),0) = 0 THEN
RETURN NULL;
END IF;
-- ignore '{' and '}' passed in
token_temp := REPLACE( REPLACE(token, '{',''), '}','');
-- get tokens (white spaces) and store in the array
IF instr(token_temp,' ')>0 THEN
token_temp := trim(upper(token));
v_count := 1;
WHILE NVL(LENGTH(token_temp),0) > 0 AND instr(token_temp,' ')>0
LOOP
temp_str := trim(SUBSTR(token_temp,1,instr(token_temp,' ')-1));
----- search for a quote in pair ----
IF instr(temp_str, '"') > 0 AND regexp_instr(token_temp, '"[^"]*"') > 0 THEN
v_value_string(v_count) := trim(REPLACE (regexp_substr(token_temp, '"[^"]*"', 1,1), '"', ''));
v_count := v_count + 1;
token_temp := regexp_replace(token_temp, '"[^"]*"','',1,1);
---- search for a valid parenthesis ----
elsif instr(temp_str, '(') > 0
-- matching parenthesis found
AND regexp_instr(token_temp, '\([^\(\)]*\)',1,1) > 0
THEN
-- store the result in the array
v_value_string(v_count) := trim(regexp_substr(token_temp, '\([^\(\)]*\)', 1, 1));
v_count := v_count + 1;
-- remove parsed string
token_temp := regexp_replace(token_temp, '\([^\(\)]*\)', '', 1, 1);
---- nomal token ----
ELSE
-- no garbage ---
IF SUBSTR(temp_str,LENGTH(temp_str))<>'('
AND SUBSTR(temp_str,LENGTH(temp_str))<>')'
AND SUBSTR(temp_str,LENGTH(temp_str))<>'"'
THEN
v_value_string(v_count) := temp_str;
v_count := v_count + 1;
END IF;
token_temp := SUBSTR(token_temp,instr(token_temp,' ') +1);
END IF;
token_temp := trim(token_temp);
END LOOP;
-- get the last one.
IF SUBSTR(token_temp,LENGTH(token_temp))='\' OR SUBSTR(token_temp,LENGTH(token_temp))='?' OR SUBSTR(token_temp,LENGTH(token_temp))=';' THEN
token_temp := SUBSTR(token_temp,1,LENGTH(token_temp)-1)||'\'||SUBSTR(token_temp,LENGTH(token_temp));
END IF;
-- remove garbage ---
IF NVL(LENGTH(token_temp),0) > 0
AND LENGTH(token_temp) > 1
AND SUBSTR(token_temp,1)<>'('
AND SUBSTR(token_temp,1)<>')'
AND SUBSTR(token_temp,1)<>'"'
THEN
v_value_string(v_count) := token_temp;
END IF;
ELSE
-- only one element in array
token_temp := token;
IF SUBSTR(token_temp,LENGTH(token_temp))='\' OR SUBSTR(token_temp,LENGTH(token_temp))='?' OR SUBSTR(token_temp,LENGTH(token_temp))=';' THEN
token_temp := SUBSTR(token_temp,1,LENGTH(token_temp)-1)||'\'||SUBSTR(token_temp,LENGTH(token_temp));
END IF;
v_value_string(1) := upper(token_temp);
END IF;
-- string parsed
--- now build the search string ---
v_count := 0;
build_string := '';
FOR i IN v_value_string.first..v_value_string.last
LOOP
v_count := v_count + 1;
IF SUBSTR(v_value_string(i), 1, 1) = '('
AND SUBSTR(v_value_string(i), -1, 1) = ')' THEN
token_temp := parse_search_string(SUBSTR(v_value_string(i), 2, LENGTH(v_value_string(i)) - 2));
IF v_count = 1
OR v_value_string(i-1) = 'AND'
OR v_value_string(i-1) = '&'
OR v_value_string(i-1) = 'OR'
OR v_value_string(i-1) = '|'
OR v_value_string(i-1) = 'NOT'
OR v_value_string(i-1) = '~'
THEN
build_string := build_string || ' (' || token_temp || ')';
ELSE
build_string := build_string || ' AND (' || token_temp || ')';
END IF;
ELSIF v_value_string(i) = 'AND'
OR v_value_string(i) = '&'
OR v_value_string(i) = 'OR'
OR v_value_string(i) = '|'
OR v_value_string(i) = 'NOT'
OR v_value_string(i) = '~'
-- OR v_value_string(i) = 'NEAR'
-- OR v_value_string(i) = ';'
-- OR v_value_string(i) = 'ACCUM'
THEN
build_string := build_string||' '||v_value_string(i);
ELSIF v_count = 1
OR v_value_string(i-1) = 'AND'
OR v_value_string(i-1) = '&'
OR v_value_string(i-1) = 'OR'
OR v_value_string(i-1) = '|'
OR v_value_string(i-1) = 'NOT'
OR v_value_string(i-1) = '~'
-- OR v_value_string(i-1) = 'NEAR'
-- OR v_value_string(i-1) = ';'
-- OR v_value_string(i-1) = 'ACCUM'
THEN
IF instr(v_value_string(i), '%') > 0
OR instr(v_value_string(i), '_') > 0 THEN
build_string := build_string||' '|| regexp_replace(v_value_string(i),'[\(\)]','');
ELSE
build_string := build_string||' {'|| v_value_string(i) || '}';
END IF;
ELSE
IF instr(v_value_string(i), '%') > 0
OR instr(v_value_string(i), '_') > 0 THEN
build_string := build_string||' AND '|| regexp_replace(v_value_string(i),'[\(\)]','');
ELSE
build_string := build_string||' AND {'||v_value_string(i) || '}';
END IF;
END IF;
END LOOP;
build_string := trim(build_string);
IF SUBSTR(build_string,LENGTH(build_string) -1)='{}' THEN
build_string := SUBSTR(build_string,1,LENGTH(build_string)-2);
END IF;
WHILE regexp_instr(build_string, '^(AND|OR) +') > 0 LOOP
build_string := trim(regexp_replace(build_string, '^(AND|OR) +', '', 1, 1));
END LOOP;
WHILE regexp_instr(build_string, ' +(AND|OR)$') > 0 LOOP
build_string := trim(regexp_replace(build_string, ' +(AND|OR)$', '', 1, 1));
END LOOP;
RETURN(build_string);
END;
February 17, 2009 - 9:43 am UTC
this is an algorithm you'll have to develop... it is what we do for a living isn't it?
You want to build a little parser - go for it.