Function to remove accents in postgreSQL

chamini2 picture chamini2 · Nov 28, 2012 · Viewed 35.7k times · Source

Possible Duplicate:
Does PostgreSQL support “accent insensitive” collations?

I'm trying to remove the accents so when I make a SELECT it ignores them.

Example:

SELECT * FROM table WHERE
table.id ILIKE 'Jose';

It returns:

José
Jose
Jósé
Jóse

or something like that.

I found these functions and they wont work, I'm thinking it may be the fact that I'm using Struts 1.X, please check them out and tell me where I'm wrong or what other function should I use.

FIRST FUNCTION

CREATE OR REPLACE FUNCTION unaccent_string(text) RETURNS text AS $$
DECLARE
    input_string text := $1;
BEGIN

input_string := translate(input_string, 'âãäåāăąÁÂÃÄÅĀĂĄ', 'aaaaaaaaaaaaaaa');
input_string := translate(input_string, 'èééêëēĕėęěĒĔĖĘĚ', 'eeeeeeeeeeeeeee');
input_string := translate(input_string, 'ìíîïìĩīĭÌÍÎÏÌĨĪĬ', 'iiiiiiiiiiiiiiii');
input_string := translate(input_string, 'óôõöōŏőÒÓÔÕÖŌŎŐ', 'ooooooooooooooo');
input_string := translate(input_string, 'ùúûüũūŭůÙÚÛÜŨŪŬŮ', 'uuuuuuuuuuuuuuuu');

return input_string;
END;
$$ LANGUAGE plpgql;

SECOND FUNCTION

CREATE OR REPLACE FUNCTION norm_text_latin(character varying) 
  RETURNS character varying AS 
$BODY$ 
declare 
        p_str    alias for $1; 
        v_str    varchar; 
begin 
        select translate(p_str, 'ÀÁÂÃÄÅ', 'AAAAAA') into v_str; 
        select translate(v_str, 'ÉÈËÊ', 'EEEE') into v_str; 
        select translate(v_str, 'ÌÍÎÏ', 'IIII') into v_str; 
        select translate(v_str, 'ÌÍÎÏ', 'IIII') into v_str; 
        select translate(v_str, 'ÒÓÔÕÖ', 'OOOOO') into v_str; 
        select translate(v_str, 'ÙÚÛÜ', 'UUUU') into v_str; 
        select translate(v_str, 'àáâãäå', 'aaaaaa') into v_str; 
        select translate(v_str, 'èéêë', 'eeee') into v_str; 
        select translate(v_str, 'ìíîï', 'iiii') into v_str; 
        select translate(v_str, 'òóôõö', 'ooooo') into v_str; 
        select translate(v_str, 'ùúûü', 'uuuu') into v_str; 
        select translate(v_str, 'Çç', 'Cc') into v_str; 
        return v_str; 
end;$BODY$ 
  LANGUAGE 'plpgsql' VOLATILE; 

Both functions generate the following error when I run the file in NetBeans:

ERROR: unterminated dollar-quoted string at or near "*the string that starts the function*"

Answer

titanofold picture titanofold · Nov 28, 2012

Use the unaccent module that ships with PostgreSQL.

somedb=# CREATE EXTENSION unaccent;
CREATE EXTENSION
somedb=# SELECT unaccent('Hôtel');
 unaccent
----------
 Hotel

somedb=# SELECT * FROM table WHERE lower(unaccent(table.id)) = lower('Jose');

And speed things up a bit by generating an accentless, lower-cased index:

somedb=# CREATE INDEX CONCURRENTLY ON table (lower(unaccent(id)));