views:

577

answers:

2

I'm new to Postgres and SQL. I created the following script that draws a line from a point to a projected point on the nearest line. It works fine on a small data set 5 to 10 points with the same number of lines; however, doing it on 60 points with 2,000 lines, the query takes about 12 hours. it is based on a nearest neighbour function pasted below as well from http://www.bostongis.com/downloads/pgis_nn.txt

EDIT documentation on pgis_fn_nn is available on http://www.bostongis.com/PrinterFriendly.aspx?content_name=postgis_nearest_neighbor_generic

The slow part is the implementation of pgis_fn_nn(...)

  1. What am I doing wrong?
  2. Are there any tips to make this faster ?
  3. Is there a way I can improve on both scripts?
  4. What would you recommend if I want to combine both queries into one?

*my_script.sql*

-- this sql script creates a line table that connects points from a point table
-- to the projected points from the nearest line to the point of oritin 

-- delete duplicate tables if they exist
DROP TABLE exploded_roads;
DROP TABLE projected_points;
DROP TABLE lines_from_centroids_to_roads;

-- create temporary exploaded lines table
CREATE TABLE exploded_roads (
 the_geom geometry,
 edge_id  serial
);

-- insert the linestring that are not multistring
INSERT INTO exploded_roads
SELECT the_geom
FROM "StreetCenterLines"
WHERE st_geometrytype(the_geom) = 'ST_LineString';

-- insert the linestrings that need to be converted from multi string
INSERT INTO exploded_roads
SELECT the_geom
FROM (
    SELECT ST_GeometryN(
 the_geom,
 generate_series(1, ST_NumGeometries(the_geom)))
    AS the_geom 
    FROM "StreetCenterLines"
)
AS foo;

-- create projected points table with ids matching centroid table
CREATE TABLE projected_points (
 the_geom  geometry,
 pid  serial,
 dauid  int
);

-- Populate Table
-- code based on Paul Ramsey's site and Boston GIS' NN code
INSERT INTO projected_points(the_geom, dauid)
SELECT DISTINCT ON ("DAUID"::int)
 ( 
  ST_Line_Interpolate_Point(
   (
    SELECT the_geom
    FROM exploded_roads
    WHERE edge_id IN 
     (
      SELECT nn_gid
      FROM pgis_fn_nn(centroids.the_geom, 30000000, 1,10, 'exploded_roads', 'true', 'edge_id', 'the_geom')
     )
   ),
   ST_Line_Locate_Point(
    exploded_roads.the_geom,
    centroids.the_geom
   )
  )
 ),
 (centroids."DAUID"::int)

FROM exploded_roads, fred_city_o6_da_centroids centroids;


-- Create Line tables
CREATE TABLE lines_from_centroids_to_roads (
 the_geom geometry,
 edge_id SERIAL
);


-- Populate Line Table
INSERT INTO lines_from_centroids_to_roads(
SELECT
 ST_MakeLine( centroids.the_geom, projected_points.the_geom )
FROM projected_points, fred_city_o6_da_centroids centroids
WHERE projected_points.dauid = centroids.id
);

*pgis_fn_nn* from http://www.bostongis.com/downloads/pgis_nn.txt

---LAST UPDATED 8/2/2007 --
CREATE OR REPLACE FUNCTION expandoverlap_metric(a geometry, b geometry, maxe double precision, maxslice double precision)
  RETURNS integer AS
$BODY$
BEGIN
    FOR i IN 0..maxslice LOOP
        IF expand(a,maxe*i/maxslice) && b THEN
            RETURN i;
        END IF;
    END LOOP; 
    RETURN 99999999;
END;
$BODY$
LANGUAGE 'plpgsql' IMMUTABLE;

CREATE TYPE pgis_nn AS
   (nn_gid integer, nn_dist numeric(16,5));

CREATE OR REPLACE FUNCTION _pgis_fn_nn(geom1 geometry, distguess double precision, numnn integer, maxslices integer, lookupset varchar(150), swhere varchar(5000), sgid2field varchar(100), sgeom2field varchar(100))
  RETURNS SETOF pgis_nn AS
$BODY$
DECLARE
    strsql text;
    rec pgis_nn;
    ncollected integer;
    it integer;
--NOTE: it: the iteration we are currently at 
--start at the bounding box of the object (expand 0) and move up until it has collected more objects than we need or it = maxslices whichever event happens first
BEGIN
    ncollected := 0; it := 0;
    WHILE ncollected < numnn AND it <= maxslices LOOP
        strsql := 'SELECT currentit.' || sgid2field || ', distance(ref.geom, currentit.' || sgeom2field || ') as dist FROM ' || lookupset || '  as currentit, (SELECT geometry(''' || CAST(geom1 As text) || ''') As geom) As ref WHERE ' || swhere || ' AND distance(ref.geom, currentit.' || sgeom2field || ') <= ' || CAST(distguess As varchar(200)) || ' AND expand(ref.geom, ' || CAST(distguess*it/maxslices As varchar(100)) ||  ') && currentit.' || sgeom2field || ' AND expandoverlap_metric(ref.geom, currentit.' || sgeom2field || ', ' || CAST(distguess As varchar(200)) || ', ' || CAST(maxslices As varchar(200)) || ') = ' || CAST(it As varchar(100)) || ' ORDER BY distance(ref.geom, currentit.' || sgeom2field || ') LIMIT ' || 
        CAST((numnn - ncollected) As varchar(200));
        --RAISE NOTICE 'sql: %', strsql;
        FOR rec in EXECUTE (strsql) LOOP
            IF ncollected < numnn THEN
                ncollected := ncollected + 1;
                RETURN NEXT rec;
            ELSE
                EXIT;
            END IF;
        END LOOP;
        it := it + 1;
    END LOOP;
END
$BODY$
LANGUAGE 'plpgsql' STABLE;

CREATE OR REPLACE FUNCTION pgis_fn_nn(geom1 geometry, distguess double precision, numnn integer, maxslices integer, lookupset varchar(150), swhere varchar(5000), sgid2field varchar(100), sgeom2field varchar(100))
  RETURNS SETOF pgis_nn AS
$BODY$
    SELECT * FROM _pgis_fn_nn($1,$2, $3, $4, $5, $6, $7, $8);
$BODY$
  LANGUAGE 'sql' STABLE;
+2  A: 

I am using "the nearest" function to do pgRouting on OpenStreetMap data. At first I stumbled upon the fn_nn function you mention too, but a visit to the #postgis irc channel on irc.freenode.net helped me out. It turns out postgis has some fantastic linear functions that, when combined, answer everything you need!

You can find more on the linear functions at: http://postgis.refractions.net/documentation/manual-1.3/ch06.html#id2578698 but here is how I implemented it

select 
    line_interpolate_point(ways.the_geom, 
    line_locate_point(ways.the_geom, pnt))),')','')) as anchor_point,
    -- returns the anchor point
    line_locate_point(ways.the_geom, pnt) as anchor_percentage, 
    -- returns the percentage on the line where the anchor will 
    -- touch (number between 0 and 1)
    CASE
    WHEN line_locate_point(ways.the_geom, pnt) < 0.5 THEN ways.source
    WHEN line_locate_point(ways.the_geom, pnt) > 0.5 THEN ways.target
    END as node,
    -- returns the nearest end node id
    length_spheroid( st_line_substring(ways.the_geom,0,
     line_locate_point(ways.the_geom, pnt)),
    'SPHEROID[\"WGS 84\",6378137,298.257223563]' ) as length,
    distance_spheroid(pnt, line_interpolate_point(ways.the_geom, 
    line_locate_point(ways.the_geom, pnt)),
    'SPHEROID[\"WGS 84\",6378137,298.257223563]') as dist
            from ways, planet_osm_line,
            ST_GeomFromText('POINT(1.245 51.234)', 4326) as pnt 
     where ways.gid = planet_osm_line.osm_id
            order by dist asc limit 1;";

Hope this is of any use to you

milovanderlinden
thanks for the answer; however, it seems that the code returns the nearest point on a line, what if you have 2,000 lines, how does it return the nearest line to the point?
dassouki
the function line_locate_point(table.column, real_point) will search through the entire dataset, in my case 58000 line elements. the clause "order by dist asc limit 1" will give me a result with a single line element where the distance is shortest. Get it?
milovanderlinden
A: 

How about writing that slow looping logic in plperl. I have noticed it to run much fasterfor non-database logic. And if you really want to go crazy write your stored procedure in C for that logic and it will scream.

StarShip3000