My Books

My Slides

rss 아이콘 이미지


이번 시간에 간단히 살펴볼 내용은 조인 Pruning이 수행되지 않는 문제점을 해결하는 방법입니다.

 

내용은 매우 간단합니다.

 

모든 테이블에 상수 조건을 입력하면 됩니다. (간단하죠?)

 

테스트를 통해 살펴보겠습니다.

 

 

파티션용 테이블 환경 구성

 

P1, P2, 2개의 월별 파티션 테이블 및 필요한 오브젝트를 생성합니다.

 

-- 파티션 테이블 p1, p2 생성 

drop table p1 cascade;
drop table p2 cascade;

create table p1 (
    c1        integer,
    logdate   date,
    dummy     char(10)
);

create table p2 (
    c1        integer,
    logdate   date,
	amount    integer,
    dummy     char(10)
);

create table p1_y201701 (CHECK( logdate >= DATE '2017-01-01' AND logdate < DATE '2017-02-01')) INHERITS (p1);
create table p1_y201702 (CHECK( logdate >= DATE '2017-02-01' AND logdate < DATE '2017-03-01')) INHERITS (p1);
create table p1_y201703 (CHECK( logdate >= DATE '2017-03-01' AND logdate < DATE '2017-04-01')) INHERITS (p1);
create table p1_y201704 (CHECK( logdate >= DATE '2017-04-01' AND logdate < DATE '2017-05-01')) INHERITS (p1);
create table p1_y201705 (CHECK( logdate >= DATE '2017-05-01' AND logdate < DATE '2017-06-01')) INHERITS (p1);
create table p1_y201706 (CHECK( logdate >= DATE '2017-06-01' AND logdate < DATE '2017-07-01')) INHERITS (p1);
create table p1_y201707 (CHECK( logdate >= DATE '2017-07-01' AND logdate < DATE '2017-08-01')) INHERITS (p1);
create table p1_y201708 (CHECK( logdate >= DATE '2017-08-01' AND logdate < DATE '2017-09-01')) INHERITS (p1);
create table p1_y201709 (CHECK( logdate >= DATE '2017-09-01' AND logdate < DATE '2017-10-01')) INHERITS (p1);
create table p1_y201710 (CHECK( logdate >= DATE '2017-10-01' AND logdate < DATE '2017-11-01')) INHERITS (p1);
create table p1_y201711 (CHECK( logdate >= DATE '2017-11-01' AND logdate < DATE '2017-12-01')) INHERITS (p1);
create table p1_y201712 (CHECK( logdate >= DATE '2017-12-01' AND logdate < DATE '2018-01-01')) INHERITS (p1);

create table p2_y201701 (CHECK( logdate >= DATE '2017-01-01' AND logdate < DATE '2017-02-01')) INHERITS (p2);
create table p2_y201702 (CHECK( logdate >= DATE '2017-02-01' AND logdate < DATE '2017-03-01')) INHERITS (p2);
create table p2_y201703 (CHECK( logdate >= DATE '2017-03-01' AND logdate < DATE '2017-04-01')) INHERITS (p2);
create table p2_y201704 (CHECK( logdate >= DATE '2017-04-01' AND logdate < DATE '2017-05-01')) INHERITS (p2);
create table p2_y201705 (CHECK( logdate >= DATE '2017-05-01' AND logdate < DATE '2017-06-01')) INHERITS (p2);
create table p2_y201706 (CHECK( logdate >= DATE '2017-06-01' AND logdate < DATE '2017-07-01')) INHERITS (p2);
create table p2_y201707 (CHECK( logdate >= DATE '2017-07-01' AND logdate < DATE '2017-08-01')) INHERITS (p2);
create table p2_y201708 (CHECK( logdate >= DATE '2017-08-01' AND logdate < DATE '2017-09-01')) INHERITS (p2);
create table p2_y201709 (CHECK( logdate >= DATE '2017-09-01' AND logdate < DATE '2017-10-01')) INHERITS (p2);
create table p2_y201710 (CHECK( logdate >= DATE '2017-10-01' AND logdate < DATE '2017-11-01')) INHERITS (p2);
create table p2_y201711 (CHECK( logdate >= DATE '2017-11-01' AND logdate < DATE '2017-12-01')) INHERITS (p2);
create table p2_y201712 (CHECK( logdate >= DATE '2017-12-01' AND logdate < DATE '2018-01-01')) INHERITS (p2);

-- p1 파티션용 트리거 함수 생성 

CREATE OR REPLACE FUNCTION p1_insert_trigger()
RETURNS TRIGGER AS $$
BEGIN

    IF    ( NEW.logdate >= DATE '2017-01-01' AND NEW.logdate < DATE '2017-02-01') THEN INSERT INTO p1_y201701 VALUES (NEW.*);
    ELSIF ( NEW.logdate >= DATE '2017-02-01' AND NEW.logdate < DATE '2017-03-01') THEN INSERT INTO p1_y201702 VALUES (NEW.*);
    ELSIF ( NEW.logdate >= DATE '2017-03-01' AND NEW.logdate < DATE '2017-04-01') THEN INSERT INTO p1_y201703 VALUES (NEW.*);
    ELSIF ( NEW.logdate >= DATE '2017-04-01' AND NEW.logdate < DATE '2017-05-01') THEN INSERT INTO p1_y201704 VALUES (NEW.*);
    ELSIF ( NEW.logdate >= DATE '2017-05-01' AND NEW.logdate < DATE '2017-06-01') THEN INSERT INTO p1_y201705 VALUES (NEW.*);
    ELSIF ( NEW.logdate >= DATE '2017-06-01' AND NEW.logdate < DATE '2017-07-01') THEN INSERT INTO p1_y201706 VALUES (NEW.*);
    ELSIF ( NEW.logdate >= DATE '2017-07-01' AND NEW.logdate < DATE '2017-08-01') THEN INSERT INTO p1_y201707 VALUES (NEW.*);
    ELSIF ( NEW.logdate >= DATE '2017-08-01' AND NEW.logdate < DATE '2017-09-01') THEN INSERT INTO p1_y201708 VALUES (NEW.*);
    ELSIF ( NEW.logdate >= DATE '2017-09-01' AND NEW.logdate < DATE '2017-10-01') THEN INSERT INTO p1_y201709 VALUES (NEW.*);
    ELSIF ( NEW.logdate >= DATE '2017-10-01' AND NEW.logdate < DATE '2017-11-01') THEN INSERT INTO p1_y201710 VALUES (NEW.*);
    ELSIF ( NEW.logdate >= DATE '2017-11-01' AND NEW.logdate < DATE '2017-12-01') THEN INSERT INTO p1_y201711 VALUES (NEW.*);
    ELSIF ( NEW.logdate >= DATE '2017-12-01' AND NEW.logdate < DATE '2018-01-01') THEN INSERT INTO p1_y201712 VALUES (NEW.*);
    ELSE
        RAISE EXCEPTION 'Date out of range.  Fix the p1_insert_trigger() function!';
    END IF;
    RETURN NULL;
END;
$$
LANGUAGE plpgsql;

-- p2 파티션용 트리거 함수 생성 

CREATE OR REPLACE FUNCTION p2_insert_trigger()
RETURNS TRIGGER AS $$
BEGIN

    IF    ( NEW.logdate >= DATE '2017-01-01' AND NEW.logdate < DATE '2017-02-01') THEN INSERT INTO p2_y201701 VALUES (NEW.*);
    ELSIF ( NEW.logdate >= DATE '2017-02-01' AND NEW.logdate < DATE '2017-03-01') THEN INSERT INTO p2_y201702 VALUES (NEW.*);
    ELSIF ( NEW.logdate >= DATE '2017-03-01' AND NEW.logdate < DATE '2017-04-01') THEN INSERT INTO p2_y201703 VALUES (NEW.*);
    ELSIF ( NEW.logdate >= DATE '2017-04-01' AND NEW.logdate < DATE '2017-05-01') THEN INSERT INTO p2_y201704 VALUES (NEW.*);
    ELSIF ( NEW.logdate >= DATE '2017-05-01' AND NEW.logdate < DATE '2017-06-01') THEN INSERT INTO p2_y201705 VALUES (NEW.*);
    ELSIF ( NEW.logdate >= DATE '2017-06-01' AND NEW.logdate < DATE '2017-07-01') THEN INSERT INTO p2_y201706 VALUES (NEW.*);
    ELSIF ( NEW.logdate >= DATE '2017-07-01' AND NEW.logdate < DATE '2017-08-01') THEN INSERT INTO p2_y201707 VALUES (NEW.*);
    ELSIF ( NEW.logdate >= DATE '2017-08-01' AND NEW.logdate < DATE '2017-09-01') THEN INSERT INTO p2_y201708 VALUES (NEW.*);
    ELSIF ( NEW.logdate >= DATE '2017-09-01' AND NEW.logdate < DATE '2017-10-01') THEN INSERT INTO p2_y201709 VALUES (NEW.*);
    ELSIF ( NEW.logdate >= DATE '2017-10-01' AND NEW.logdate < DATE '2017-11-01') THEN INSERT INTO p2_y201710 VALUES (NEW.*);
    ELSIF ( NEW.logdate >= DATE '2017-11-01' AND NEW.logdate < DATE '2017-12-01') THEN INSERT INTO p2_y201711 VALUES (NEW.*);
    ELSIF ( NEW.logdate >= DATE '2017-12-01' AND NEW.logdate < DATE '2018-01-01') THEN INSERT INTO p2_y201712 VALUES (NEW.*);
    ELSE
        RAISE EXCEPTION 'Date out of range.  Fix the p2_insert_trigger() function!';
    END IF;
    RETURN NULL;
END;
$$
LANGUAGE plpgsql;
 
-- p1 파티션용 INSERT 트리거 생성

CREATE TRIGGER insert_p1_trigger
    BEFORE INSERT ON p1
    FOR EACH ROW EXECUTE PROCEDURE p1_insert_trigger();
	
-- p2 파티션용 INSERT 트리거 생성

CREATE TRIGGER insert_p2_trigger
    BEFORE INSERT ON p2
    FOR EACH ROW EXECUTE PROCEDURE p2_insert_trigger();
	
-- p1 테이블에 일자별로 만건 입력 

do $$
begin
  for i in 1..365 loop 
     for j in 1..10000 loop 
	    insert into p1 values((i-1)*10000+j, to_date('20170101','YYYYMMDD')+i-1, 'dummy');
	 end loop;
  end loop;
end $$;
	
-- p2 테이블에 일자별로 만건 입력

do $$
begin
  for i in 1..365 loop 
     for j in 1..10000 loop 
	    insert into p2 values((i-1)*10000+j, to_date('20170101','YYYYMMDD')+i-1, j, 'dummy');
	 end loop;
  end loop;
end $$;

 

 

통계 정보 생성 및 크기 확인

 

월별 파티션 테이블에 제대로 데이터가 입력됐는지 확인합니다.

 

analyze p1;
analyze p2;

analyze p1_y201701; 
analyze p1_y201702;
analyze p1_y201703;
analyze p1_y201704;
analyze p1_y201705;
analyze p1_y201706;
analyze p1_y201707;
analyze p1_y201708;
analyze p1_y201709;
analyze p1_y201710;
analyze p1_y201711;
analyze p1_y201712;

analyze p2_y201701; 
analyze p2_y201702;
analyze p2_y201703;
analyze p2_y201704;
analyze p2_y201705;
analyze p2_y201706;
analyze p2_y201707;
analyze p2_y201708;
analyze p2_y201709;
analyze p2_y201710;
analyze p2_y201711;
analyze p2_y201712;

select relname, relpages, reltuples::integer 
from   pg_class
where  relname like 'p1%' or
       relname like 'p2%' 
order by 1;
  relname   | relpages | reltuples
------------+----------+-----------
 p1         |        0 |         0
 p1_y201701 |     1975 |    310000
 p1_y201702 |     1784 |    280000
 p1_y201703 |     1975 |    310000
 p1_y201704 |     1911 |    300000
 p1_y201705 |     1975 |    310000
 p1_y201706 |     1911 |    300000
 p1_y201707 |     1975 |    310000
 p1_y201708 |     1975 |    310000
 p1_y201709 |     1911 |    300000
 p1_y201710 |     1975 |    310000
 p1_y201711 |     1911 |    300000
 p1_y201712 |     1975 |    310000
 p2         |        0 |         0
 p2_y201701 |     1975 |    310000
 p2_y201702 |     1784 |    280000
 p2_y201703 |     1975 |    310000
 p2_y201704 |     1911 |    300000
 p2_y201705 |     1975 |    310000
 p2_y201706 |     1911 |    300000
 p2_y201707 |     1975 |    310000
 p2_y201708 |     1975 |    310000
 p2_y201709 |     1911 |    300000
 p2_y201710 |     1975 |    310000
 p2_y201711 |     1911 |    300000
 p2_y201712 |     1975 |    310000

 

 

입력 조건이 1개의 테이블에만 있는 경우

 

아래와 같이 P1 테이블에 대해서만 날자 조건을 입력하면, P1 테이블에 대한 파티션 Pruning은 수행되지만 P2 테이블에 대한 파티션 Pruing은 수행되지 않습니다.

 

즉, 조인 Pruning을 제공하지 않으므로 P2 테이블에 딸린 모든 파티션을 Seq Scan하게 됩니다.

 

explain (analyze, buffers)
select count(*)
from   p1 a, p2 b
where  a.logdate = b.logdate 
and    a.c1      = b.c1
and    a.logdate between DATE '2017-05-01'
                     and DATE '2017-05-31';
                                                                  QUERY PLAN
----------------------------------------------------------------------------------------------------------------------------------------------
 Aggregate  (cost=129351.64..129351.65 rows=1 width=8) (actual time=3230.240..3230.240 rows=1 loops=1)
   Buffers: shared hit=25228, temp read=10163 written=10157
   ->  Hash Join  (cost=12486.01..129349.51 rows=849 width=0) (actual time=1027.855..3191.489 rows=310000 loops=1)
         Hash Cond: ((b.logdate = a.logdate) AND (b.c1 = a.c1))
         Buffers: shared hit=25228, temp read=10163 written=10157
         ->  Append  (cost=0.00..59753.00 rows=3650001 width=8) (actual time=0.008..1308.494 rows=3650000 loops=1)
               Buffers: shared hit=23253
               ->  Seq Scan on p2 b  (cost=0.00..0.00 rows=1 width=8) (actual time=0.002..0.002 rows=0 loops=1)
               ->  Seq Scan on p2_y201701 b_1  (cost=0.00..5075.00 rows=310000 width=8) (actual time=0.006..55.073 rows=310000 loops=1)
                     Buffers: shared hit=1975
               ->  Seq Scan on p2_y201702 b_2  (cost=0.00..4584.00 rows=280000 width=8) (actual time=0.016..50.464 rows=280000 loops=1)
                     Buffers: shared hit=1784
               ->  Seq Scan on p2_y201703 b_3  (cost=0.00..5075.00 rows=310000 width=8) (actual time=0.030..54.888 rows=310000 loops=1)
                     Buffers: shared hit=1975
               ->  Seq Scan on p2_y201704 b_4  (cost=0.00..4911.00 rows=300000 width=8) (actual time=0.010..58.291 rows=300000 loops=1)
                     Buffers: shared hit=1911
               ->  Seq Scan on p2_y201705 b_5  (cost=0.00..5075.00 rows=310000 width=8) (actual time=0.007..57.236 rows=310000 loops=1)
                     Buffers: shared hit=1975
               ->  Seq Scan on p2_y201706 b_6  (cost=0.00..4911.00 rows=300000 width=8) (actual time=0.010..54.321 rows=300000 loops=1)
                     Buffers: shared hit=1911
               ->  Seq Scan on p2_y201707 b_7  (cost=0.00..5075.00 rows=310000 width=8) (actual time=0.015..59.018 rows=310000 loops=1)
                     Buffers: shared hit=1975
               ->  Seq Scan on p2_y201708 b_8  (cost=0.00..5075.00 rows=310000 width=8) (actual time=0.007..56.012 rows=310000 loops=1)
                     Buffers: shared hit=1975
               ->  Seq Scan on p2_y201709 b_9  (cost=0.00..4911.00 rows=300000 width=8) (actual time=0.015..53.940 rows=300000 loops=1)
                     Buffers: shared hit=1911
               ->  Seq Scan on p2_y201710 b_10  (cost=0.00..5075.00 rows=310000 width=8) (actual time=0.010..55.760 rows=310000 loops=1)
                     Buffers: shared hit=1975
               ->  Seq Scan on p2_y201711 b_11  (cost=0.00..4911.00 rows=300000 width=8) (actual time=0.009..53.782 rows=300000 loops=1)
                     Buffers: shared hit=1911
               ->  Seq Scan on p2_y201712 b_12  (cost=0.00..5075.00 rows=310000 width=8) (actual time=0.007..57.858 rows=310000 loops=1)
                     Buffers: shared hit=1975
         ->  Hash  (cost=6625.00..6625.00 rows=310001 width=8) (actual time=244.772..244.772 rows=310000 loops=1)
               Buckets: 131072  Batches: 4  Memory Usage: 4055kB
               Buffers: shared hit=1975, temp written=793
               ->  Append  (cost=0.00..6625.00 rows=310001 width=8) (actual time=0.008..149.727 rows=310000 loops=1)
                     Buffers: shared hit=1975
                     ->  Seq Scan on p1 a  (cost=0.00..0.00 rows=1 width=8) (actual time=0.001..0.001 rows=0 loops=1)
                           Filter: ((logdate >= '2017-05-01'::date) AND (logdate <= '2017-05-31'::date))
                     ->  Seq Scan on p1_y201705 a_1  (cost=0.00..6625.00 rows=310000 width=8) (actual time=0.006..93.462 rows=310000 loops=1)
                           Filter: ((logdate >= '2017-05-01'::date) AND (logdate <= '2017-05-31'::date))
                           Buffers: shared hit=1975
 Planning time: 0.440 ms
 Execution time: 3230.319 ms

 

입력 조건을 추가한 경우


위와 같은 문제점때문에, 쿼리 작성 시에는 아래와 같이 모든 테이블에 상수 조건을 입력해야 합니다.

수행 결과를 보면, P1 테이블에서 5월 파티션, P2 테이블에서 5월 파티션만 액세스한 것을 알 수 있습니다.

결과적으로 수행속도도 매우 빠르게 개선되었습니다.

 

explain (analyze, buffers)
select count(*)
from   p1 a, p2 b
where  a.logdate = b.logdate 
and    a.c1      = b.c1
and    a.logdate between DATE '2017-05-01'
                     and DATE '2017-05-31'
and    b.logdate between DATE '2017-05-01'
                     and DATE '2017-05-31';
                                                                  QUERY PLAN
----------------------------------------------------------------------------------------------------------------------------------------------
 Aggregate  (cost=25079.64..25079.65 rows=1 width=8) (actual time=721.734..721.734 rows=1 loops=1)
   Buffers: shared hit=3950, temp read=1598 written=1592
   ->  Hash Join  (cost=12486.01..25077.51 rows=849 width=0) (actual time=219.268..677.870 rows=310000 loops=1)
         Hash Cond: ((a.logdate = b.logdate) AND (a.c1 = b.c1))
         Buffers: shared hit=3950, temp read=1598 written=1592
         ->  Append  (cost=0.00..6625.00 rows=310001 width=8) (actual time=0.011..129.327 rows=310000 loops=1)
               Buffers: shared hit=1975
               ->  Seq Scan on p1 a  (cost=0.00..0.00 rows=1 width=8) (actual time=0.003..0.003 rows=0 loops=1)
                     Filter: ((logdate >= '2017-05-01'::date) AND (logdate <= '2017-05-31'::date))
               ->  Seq Scan on p1_y201705 a_1  (cost=0.00..6625.00 rows=310000 width=8) (actual time=0.006..75.549 rows=310000 loops=1)
                     Filter: ((logdate >= '2017-05-01'::date) AND (logdate <= '2017-05-31'::date))
                     Buffers: shared hit=1975
         ->  Hash  (cost=6625.00..6625.00 rows=310001 width=8) (actual time=219.036..219.036 rows=310000 loops=1)
               Buckets: 131072  Batches: 4  Memory Usage: 4055kB
               Buffers: shared hit=1975, temp written=793
               ->  Append  (cost=0.00..6625.00 rows=310001 width=8) (actual time=0.007..131.560 rows=310000 loops=1)
                     Buffers: shared hit=1975
                     ->  Seq Scan on p2 b  (cost=0.00..0.00 rows=1 width=8) (actual time=0.001..0.001 rows=0 loops=1)
                           Filter: ((logdate >= '2017-05-01'::date) AND (logdate <= '2017-05-31'::date))
                     ->  Seq Scan on p2_y201705 b_1  (cost=0.00..6625.00 rows=310000 width=8) (actual time=0.005..78.213 rows=310000 loops=1)
                           Filter: ((logdate >= '2017-05-01'::date) AND (logdate <= '2017-05-31'::date))
                           Buffers: shared hit=1975
 Planning time: 0.522 ms
 Execution time: 721.791 ms


 

저작자 표시 비영리 변경 금지
신고
크리에이티브 커먼즈 라이선스
Creative Commons License


 

티스토리 툴바