Being involved in performance audits on systems of every size, from start-up sites hacked together overnight, to a ginormous applications built by world-recognized brand companies, I’ve seen a lot of interesting (and sometimes very unique) performance issues in every level of the stack: code, architecture, databases (sometimes all of the above). But there are a few particular, very “Performance 101″, issues that (unfortunately) appear in a lot of code bases. In this talk I'll present the most common database-related performance bottlenecks that can happen in most applications.
2. who am I ?
• 20+ years since first print ‘Hello World!’;
• currently @ OmniTI
• development and operations of large web applications
• performance & scalability
• we are hiring:
• https://omniti.com/is/hiring
5. how database connection works
① establish connection
② send query
③ process query
④ send result
⑤ close connection
6. common database connection
my $dbh = DBI->connect(…);
my $sth = $dbh->prepare($query);
$sth->execute();
my $result = $sth->fetchrow_hashref();
$dbh->disconnect;
7. common database connection
① my $dbh = DBI->connect(…);
② my $sth = $dbh->prepare($query);
③ $sth->execute();
④ my $result = $sth->fetchrow_hashref();
⑤ $dbh->disconnect;
11. example
my @data_array;
for (my $i; $i<=10; $i++) {
my $dbh = DBI->connect(…);
my $sth = $dbh->prepare(qq{select * from foo
where id = ?});
$sth->execute($i);
push @data_array, $sth->fetchrow_hashref();
$dbh->disconnect;
}
16. correct example
my @data_array;
my $dbh = DBI->connect(…);
my $sth = $dbh->prepare(qq{select * from foo
where id = ?});
for (my $i; $i<=10; $i++) {
$sth->execute($i);
push @data_array, $sth->fetchrow_hashref();
}
$dbh->disconnect;
20. n+1
# get a list of items
my %item_list;
my $sth = $dbh->prepare(qq{select id from items
where active = true});
$sth->execute();
# get properties for each items
while ( my @row = $sth->fetchrow_hashref() ) {
my $sth_prop = $dbh->prepare(qq{select * from
item_properties where item_id = ?});
$sth_prop->execute($row->{‘id’});
$item_list{$row->{‘id’}}{‘props’} =
$sth_prop->fetchall_hashref();
}
21. n+1 you don’t know about
# get a list of items
my @ids = get_active_item_ids();
my @item_props;
# get properties for each items
foreach my $i (@ids) {
push @item_props, Item->new($i)->properties();
}
22. easy solution
# get a list of items with properties
my $sth = $dbh->prepare(qq{select i. item_id, p.*
from items i,
item_properties p
where i.item_id = p.item_id
and active = true});
$sth->execute();
# arrange object to your liking
while (my $row = $sth->fetchrow_hashref()) {
$item_list{$row->{‘id’}}{‘props’} = $row; # sans id
$item_list{$row->{‘id’}}{‘id’} = $row->{‘id’};
}
25. # create temp table naughty_users
# and get data from it
with naughty_users as (
select * from users where banned = 1
)
select userid, email from naughty_users;
27. multiple queries are required
# create user record
insert into users (name, email) values (?,?) returning
userid
# create address record
insert into addresses (userid, address, city, state,
zip) values (?,?,?,?,?) returning addressid
# track changes to user information
insert into user_history (userid, addressid, action)
values (?,?,?) returning historyid
28. or are they?
with userdata as (
insert into users (name, email) values (?,?)
returning userid
), addressdata as (
insert into addresses (userid, address, city, state, zip)
select userid,?,?,?,? from userdata
returning addressid
), historydata as (
insert into user_history (userid, addressid, action)
select userid, addressid,?
from userdata, addressdata
returning historyid
)
select userid, addressid, historyid
from userdata, addressdata, historydata;
29. why not use transactions?
• no complicated transaction code
• no complicated error handling code
• reduced query overhead
• better performance
30. find out more
For more details:
http://omniti.com/seeds/writable-ctes-improve-performance
36. would you trust junior with this?
select * from
(
select bannerid, caption, client_url, image_file, sponsorid, weight from
(
select V.bannerid, V.impressions, B.caption, B.client_url, B.image_file, s.sponsorid, s.weight,
row_number() over (partition by s.sponsorid order by s.weight desc) ranking
FROM
(
-- This level gives me a list of banners sorted by least seen,and then by highest weight
select valid.bannerid, valid.totalweight, count(I.timestamp) as impressions FROM
(
-- This level gets me a list of banners that are valid for display
select b.bannerid,
-- Add up the weight from 4 sources. Banner weight, and weight for each data item they match
decode( decode(bitand(u.STATE_BM1,b.STATE_BM1),0,0,1) +
decode(bitand(u.STATE_BM2,b.STATE_BM2),0,0,1) +
decode(bitand(u.STATE_BM3,b.STATE_BM3),0,0,1),0,0,b.STATE_WT
) +
decode(bitand(u.AGE_BM,b.AGE_BM),0,0,b.AGE_WT)+
decode(bitand(u.GENDER_BM,b.GENDER_BM),0,0,b.GENDER_WT)+
b.weight as totalweight
from tgif.tbl_users u, tgif.tbl_banners b, tgif.tbl_bannerstats bs
where
-- I only care about ME!
u.userid= 1
-- Don't show inactive banners
and b.inactive != 1
-- Only show banners that are currently running
and sysdate < b.end_date and sysdate >=b.start_date
-- Only get the type of banner i'm looking for
and b.type= 3
-- Join on the total stats, and only display banners that haven't reached their per banner maximums
and b.bannerid = bs.bannerid
and ( b.max_impressions IS NULL OR bs.total_impressions < b.max_impressions )
and ( b.max_clicks IS NULL OR bs.total_clicks < b.max_clicks )
and ( b.max_conversions IS NULL OR bs.total_conversions < b.max_conversions )
-- Ignore any banners that don't match their demographics (ie, male banner won't go to females)
and ( b.AGE_BM IS NULL OR b.AGE_BM = 0 OR bitand(u.AGE_BM, b.AGE_BM) != 0 )
and ( b.GENDER_BM IS NULL OR b.GENDER_BM =0 OR bitand(u.GENDER_BM, b.GENDER_BM) != 0 )
and ( b.STATE_BM1 IS NULL OR b.STATE_BM1 =0 OR bitand(u.STATE_BM1, b.STATE_BM1) != 0 )
and ( b.STATE_BM2 IS NULL OR b.STATE_BM2 =0 OR bitand(u.STATE_BM2, b.STATE_BM2) != 0 )
and ( b.STATE_BM3 IS NULL OR b.STATE_BM3 =0 OR bitand(u.STATE_BM3, b.STATE_BM3) != 0 )
-- But don't show me any banners that I have already signed up
and b.bannerid NOT IN (
SELECT B.bannerid FROM tgif.tbl_bannerconversions C, tgif.tbl_banners B, tgif.tbl_sponsors sp
WHERE C.USERID=1
AND C.bannerid=B.bannerid
AND B.sponsorid=sp.sponsorid
-- unless they have a conversion interval, and that interval has expired
AND ( sp.conversion_interval = 0 OR sysdate > C.timestamp+sp.conversion_interval )
)
-- Don't show me any banners that have SPONSORS that have reached their maximums
and b.sponsorid NOT IN (
-- I believe this would be better done using HAVING clauses, but I can't figure it out
-- Take the banners for a sponsor in the bannerstats table, and get the totals per sponsor
-- return anything that has reached it's maximum
select sponsorid FROM
(
SELECT S.sponsorid, S.max_impressions, S.max_conversions, S.max_clicks,
sum(total_impressions) as imps, sum(total_conversions) as convs,
sum(total_clicks) as clicks
FROM tgif.tbl_sponsors S, tgif.tbl_banners B, tgif.tbl_bannerstats bs
WHERE S.sponsorid=B.sponsorid
AND B.bannerid=bs.bannerid
GROUP BY S.Sponsorid, S.max_impressions, S.max_conversions, S.max_clicks
) exclude
WHERE ( imps > max_impressions OR convs >= max_conversions OR clicks > max_clicks )
)
) valid, tgif.tbl_bannerimpressions I
where
valid.bannerid=I.bannerid(+)
and I.userid(+)=1
group by valid.bannerid, valid.totalweight
-- I want to see banners I haven't seen yet, sorted by highest weight, so we sort by number
-- of times that this user has seen this particular banner, then we sort by weight
order by impressions, totalweight DESC
) V, tgif.tbl_banners B, tgif.tbl_sponsors S
where B.bannerid=V.bannerid
and B.sponsorid=S.sponsorid
and S.inactive != 1
and s.sponsorid not in (
) valid, tgif.tbl_bannerimpressions I
where
valid.bannerid=I.bannerid(+)
and I.userid(+)=1
group by valid.bannerid, valid.totalweight
-- I want to see banners I haven't seen yet, sorted by highest weight, so we sort by number
-- of times that this user has seen this particular banner, then we sort by weight
order by impressions, totalweight DESC
) V, tgif.tbl_banners B, tgif.tbl_sponsors S
where B.bannerid=V.bannerid
and B.sponsorid=S.sponsorid
and S.inactive != 1
and s.sponsorid not in (
-- Check the user impression cap to make sure it hasn't been passed by the user
select s.sponsorid from tgif.tbl_banners b, tgif.tbl_sponsors s,
tgif.TBL_BANNERIMPRESSIONS i
where s.sponsorid = b.sponsorid
and b.bannerid = i.bannerid
and i.timestamp >= sysdate - nvl(user_impression_cap_days,100)
and userid = 1
group by s.sponsorid
having count(*) >= max(nvl(user_impression_cap,1000000000))
)
-- Make sure the sponsor is still in the valid table. This table is updated hourly
-- and contains the sponsors that have not gone over their sponsor level frequencies for
-- impressions/conversions/clicks
and s.sponsorid in (select sponsorid from tgif.tbl_active_sponsors)
)
where ranking=1
--Order the banners by sponsor weight, which is handled by the ranking
--order by S.weight
order by impressions, weight desc
)
where rownum <= 10;
38. object construction issue
METHOD REAL USER SYS PCPU
Base ORM 6.330 5.771 0.212 94.51
SQL without objects 0.664 0.274 0.120 59.35
SQL with ORM objects 6.354 5.797 0.197 94.34
43. illustrating wrong
# get all orders
my $sth = $dbh->prepare("select order_id, price from orders");
$sth->execute();
my $orders= $sth->fetchrow_hashref();
my $count = 1;
my $total = 0;
my $avg = 0;
# get average $ for last 10 orders
foreach my $o (sort {$orders->{‘order_id’}->{$b} <=> $orders->{‘order_id’}->{$a} } %$orders) {
$total += $o->{‘price’};
if ($count == 10) {
$avg = $total/$count;
last;
}
$count++;
}
44. vs right
# get average $ for last 10 orders
$sth = $dbh->prepare(qq{select avg(price) as avg_price
from (select price from orders
order by order_id desc limit 10) });
$sth->execute();
$orders= $sth->fetchrow_hashref();
$avg = $orders->{‘avg_price’};
45. database can do …
1. math
2. dates
3. aggregations
4. [partial] matches
5. much, much more