SlideShare une entreprise Scribd logo
1  sur  31
SQL
Adilson Mendonca
select i_ITEM_id,i_item_desc,i_category,i_class,i_CURRENT_price, sum(cs_EXT_sales_price) as
itemrevenue, sum(cs_ext_sales_price) *100/sum(sum(cs_ext_sales_price)) over (partition by
I_class) as revenueratio
From catalog_sales,item,date_dim
where cs_item_sk = i_item_sk AND i_category in ('Jewelry', 'Sports', 'Books')
and cs_sold_date_sk = d_date_sk and cast(d_date as timestamp) between cast('2001-01-12' as
timestamp)
and (cast('2001-01-12' as timestamp) + interval 30 days)
group by i_item_id,i_item_desc,i_category,i_class,i_current_price
order by i_category,i_class,i_item_id,i_item_desc,revenueratio
limit 100;
Code Visibility
select
i_ITEM_id,
I_item_desc,
I_category,
I_class,
i_CURRENT_price,
sum(cs_EXT_sales_price) as itemrevenue,
sum(cs_ext_sales_price) *100/sum(sum(cs_ext_sales_price))
over (partition by I_class) as revenueratio
FROM catalog_sales,item,date_dim
where cs_item_sk = i_item_sk
AND i_category in ('Jewelry', 'Sports', 'Books')
and cs_sold_date_sk = d_date_sk
and cast(d_date as timestamp) between cast('2001-01-12' as timestamp)
and (cast('2001-01-12' as timestamp) + interval 30 days)
group by i_item_id,i_item_desc,i_category,i_class,i_current_price
order by i_category,i_class,i_item_id,i_item_desc,revenueratio
limit 100;
select i_ITEM_id,
I_item_desc,
I_category,
I_class,
i_CURRENT_price,
sum(cs_EXT_sales_price) as itemrevenue,
sum(cs_ext_sales_price) *100/sum(sum(cs_ext_sales_price))
over (partition by I_class) as revenueratio
FROM catalog_sales,item,date_dim
where cs_item_sk = i_item_sk
AND i_category in ('Jewelry', 'Sports', 'Books')
and cs_sold_date_sk = d_date_sk
and cast(d_date as timestamp)
between cast('2001-01-12' as timestamp)
and (cast('2001-01-12' as timestamp) + interval 30 days)
group by i_item_id,
I_item_desc,
I_category,
I_class,
i_current_price
order by i_category,
I_class,
I_item_id,
I_item_desc,
revenueratio
limit 100;
SELECT i_item_id,
,i_item_desc
,i_category
,i_class
,i_current_price
,SUM(cs_ext_sales_price) AS item_revenue
,SUM(cs_ext_sales_price) * 100
/ SUM(SUM(cs_ext_sales_price))
OVER (PARTITION BY i_class) AS revenue_ratio
FROM catalog_sales
JOIN item ON cs_item_sk = i_item_sk
JOIN date_dim ON cs_sold_date_sk = d_date_sk
WHERE i_category IN ('Jewelry', 'Sports', 'Books')
AND CAST(d_date AS TIMESTAMP)
BETWEEN CAST('2001-01-12' AS TIMESTAMP)
AND CAST('2001-01-12' AS TIMESTAMP) + INTERVAL 30 DAYS
GROUP BY 1, 2, 3, 4, 5
ORDER BY 3, 4, 1
LIMIT 100
SELECT item.i_item_id,
item.i_item_desc,
item.i_category,
item.i_class,
item.i_current_price,
SUM(catalog_sales.cs_ext_sales_price) AS item_revenue,
SUM(catalog_sales.cs_ext_sales_price) * 100
/ SUM(SUM(catalog_sales.cs_ext_sales_price))
OVER (PARTITION BY item.i_class) AS revenue_ratio
FROM catalog_sales
JOIN item
ON catalog_sales.cs_item_sk = item.i_item_sk
JOIN date_dim
ON catalog_sales.cs_sold_date_sk = date_dim.d_date_sk
WHERE item.i_category IN ('Jewelry', 'Sports', 'Books')
AND CAST(date_dim.d_date AS TIMESTAMP)
BETWEEN CAST('2001-01-12' AS TIMESTAMP)
AND CAST('2001-01-12' AS TIMESTAMP) + INTERVAL 30 DAYS
GROUP BY 1, 2, 3, 4, 5
ORDER BY 3, 4, 1
LIMIT 100
Over commenting
-- get id, name and open only once
SELECT DISTINCT t1.id , t1.name , t3.open
-- select names from table 1, order them by date and return first 3
FROM (SELECT id , name FROM table1 ORDER BY date DESC LIMIT 3) AS t1
-- get open from table 2 IF id is there, open = 1 and type =2
LEFT JOIN (
( SELECT open , name_id FROM table2 WHERE open=1 AND type=2 ) AS t3 )
ON t1.id = t3.name_id
-- order by name from A-Z
ORDER BY t1.name ASC
Follow Patterns
Indentation
Don’t over comment - just clear code
Remove commented lines of code
Use alias on all column when joining
Know your PKs and Unique keys
Make easy future maintenance
Execution time
JUST select the tables/columns which be in USE
Code block
WITH
patient_data AS
(SELECT patient_id,
patient_name,
hospital,
drug_dosage
FROM hospital_registry
WHERE (COALESCE(last_visit,NOW()) > NOW() - INTERVAL '14 days')
AND city = "Los Angeles"
),
average_dosage AS
(SELECT hospital,
AVG(drug_dosage) AS Average
FROM patient_data
GROUP BY hospital
)
SELECT count(hospital)
FROM average_dosage
WHERE drug_dosage > 1000
Master the use of:
Functions
Window Functions (OLAP functions)
CTE - Common Table Expression
Views
UDF - User Defined Function
Lost
Data Structure
Build an ERD if you don’t have
Primary Keys
Unique Keys
Table Size
Number of columns (columnar databases)
KNOW YOUR DATA
Data Modelling
Modelling techniques
Transactional
3NF
Star Schema - Data Marts
Integration - Data Vault
Data Lake - Big Data
Flat tables
Data Lake or Data swamp
AVOID the journey to a SWAMP
Organize your data and contents
Use Name conventions - rules
Be aware of object creation
Document them on same way
Verbose
Customer
id
name
date
Sales
id
date
Cust_id
amount
Customer
id_customer
name
date_of_birth
Sales
id_sales
sales_date
fk_customer
amount_inc_tax
SELECT c.name AS customer_name,
c.date AS customer.dob,
s.date AS sales_date,
COUNT(1) AS no_of_sales,
SUM(amount) AS amount
FROM customer c
JOIN sales s
ON c.id = s.id
SELECT c.name AS customer_name,
c.date_of_birth,
s.sales_date,
COUNT(1) no_of_sales,
SUM(amount_inc_tax) AS amount_inc_tax
FROM customer c
JOIN sales s
ON c.id_customer = s.fk_customer
Minimise usage of non standard abbreviations
Don’t use too long names - you will need to type them one day
PK & FK should be a pattern
ID & table name, FK and link table
Maybe:
Data types definition on names like: price_amt, tax_pct
Know technologies
Know you databases & tools
Differences, limitations, strength and
weakness
Columnar databases
Functionalities
Access & Security
Help make a better
world with
beautiful code!!!

Contenu connexe

Similaire à SQL coding at Sydney Measure Camp 2018

Oracle Advanced Dml
Oracle Advanced DmlOracle Advanced Dml
Oracle Advanced Dml
ssunka01
 
100 sample formulas_v6
100 sample formulas_v6100 sample formulas_v6
100 sample formulas_v6
artimaroo1
 
Below is my code- I have an error that I still have difficulty figurin.pdf
Below is my code- I have an error that I still have difficulty figurin.pdfBelow is my code- I have an error that I still have difficulty figurin.pdf
Below is my code- I have an error that I still have difficulty figurin.pdf
armanuelraj
 

Similaire à SQL coding at Sydney Measure Camp 2018 (20)

Customer Clustering for Retailer Marketing
Customer Clustering for Retailer MarketingCustomer Clustering for Retailer Marketing
Customer Clustering for Retailer Marketing
 
James Colby Maddox Business Intellignece and Computer Science Portfolio
James Colby Maddox Business Intellignece and Computer Science PortfolioJames Colby Maddox Business Intellignece and Computer Science Portfolio
James Colby Maddox Business Intellignece and Computer Science Portfolio
 
Data Exploration with Apache Drill: Day 2
Data Exploration with Apache Drill: Day 2Data Exploration with Apache Drill: Day 2
Data Exploration with Apache Drill: Day 2
 
2018 03 27_biological_databases_part4_v_upload
2018 03 27_biological_databases_part4_v_upload2018 03 27_biological_databases_part4_v_upload
2018 03 27_biological_databases_part4_v_upload
 
Data Warehousing
Data WarehousingData Warehousing
Data Warehousing
 
Final Project SQL - Elyada Wigati Pramaresti.pptx
Final Project SQL - Elyada Wigati Pramaresti.pptxFinal Project SQL - Elyada Wigati Pramaresti.pptx
Final Project SQL - Elyada Wigati Pramaresti.pptx
 
Introtosqltuning
IntrotosqltuningIntrotosqltuning
Introtosqltuning
 
Meetup Beleza na Web - Funções analíticas com SQL
Meetup Beleza na Web - Funções analíticas com SQLMeetup Beleza na Web - Funções analíticas com SQL
Meetup Beleza na Web - Funções analíticas com SQL
 
Database Management System - SQL Advanced Training
Database Management System - SQL Advanced TrainingDatabase Management System - SQL Advanced Training
Database Management System - SQL Advanced Training
 
Company segmentation - an approach with R
Company segmentation - an approach with RCompany segmentation - an approach with R
Company segmentation - an approach with R
 
Sql wksht-3
Sql wksht-3Sql wksht-3
Sql wksht-3
 
Oracle Advanced Dml
Oracle Advanced DmlOracle Advanced Dml
Oracle Advanced Dml
 
100 sample formulas_v6
100 sample formulas_v6100 sample formulas_v6
100 sample formulas_v6
 
Useful date number and string sql queries
Useful date number and string sql queriesUseful date number and string sql queries
Useful date number and string sql queries
 
Data Warehouse Project
Data Warehouse ProjectData Warehouse Project
Data Warehouse Project
 
An introduction to Machine Learning
An introduction to Machine LearningAn introduction to Machine Learning
An introduction to Machine Learning
 
Building a data warehouse
Building a data warehouseBuilding a data warehouse
Building a data warehouse
 
Data Modeling in Looker
Data Modeling in LookerData Modeling in Looker
Data Modeling in Looker
 
Below is my code- I have an error that I still have difficulty figurin.pdf
Below is my code- I have an error that I still have difficulty figurin.pdfBelow is my code- I have an error that I still have difficulty figurin.pdf
Below is my code- I have an error that I still have difficulty figurin.pdf
 
Powering Heap With PostgreSQL And CitusDB (PGConf Silicon Valley 2015)
Powering Heap With PostgreSQL And CitusDB (PGConf Silicon Valley 2015)Powering Heap With PostgreSQL And CitusDB (PGConf Silicon Valley 2015)
Powering Heap With PostgreSQL And CitusDB (PGConf Silicon Valley 2015)
 

Dernier

Dernier (20)

Apidays New York 2024 - The Good, the Bad and the Governed by David O'Neill, ...
Apidays New York 2024 - The Good, the Bad and the Governed by David O'Neill, ...Apidays New York 2024 - The Good, the Bad and the Governed by David O'Neill, ...
Apidays New York 2024 - The Good, the Bad and the Governed by David O'Neill, ...
 
Real Time Object Detection Using Open CV
Real Time Object Detection Using Open CVReal Time Object Detection Using Open CV
Real Time Object Detection Using Open CV
 
Connector Corner: Accelerate revenue generation using UiPath API-centric busi...
Connector Corner: Accelerate revenue generation using UiPath API-centric busi...Connector Corner: Accelerate revenue generation using UiPath API-centric busi...
Connector Corner: Accelerate revenue generation using UiPath API-centric busi...
 
Artificial Intelligence Chap.5 : Uncertainty
Artificial Intelligence Chap.5 : UncertaintyArtificial Intelligence Chap.5 : Uncertainty
Artificial Intelligence Chap.5 : Uncertainty
 
AWS Community Day CPH - Three problems of Terraform
AWS Community Day CPH - Three problems of TerraformAWS Community Day CPH - Three problems of Terraform
AWS Community Day CPH - Three problems of Terraform
 
GenAI Risks & Security Meetup 01052024.pdf
GenAI Risks & Security Meetup 01052024.pdfGenAI Risks & Security Meetup 01052024.pdf
GenAI Risks & Security Meetup 01052024.pdf
 
2024: Domino Containers - The Next Step. News from the Domino Container commu...
2024: Domino Containers - The Next Step. News from the Domino Container commu...2024: Domino Containers - The Next Step. News from the Domino Container commu...
2024: Domino Containers - The Next Step. News from the Domino Container commu...
 
Mastering MySQL Database Architecture: Deep Dive into MySQL Shell and MySQL R...
Mastering MySQL Database Architecture: Deep Dive into MySQL Shell and MySQL R...Mastering MySQL Database Architecture: Deep Dive into MySQL Shell and MySQL R...
Mastering MySQL Database Architecture: Deep Dive into MySQL Shell and MySQL R...
 
MINDCTI Revenue Release Quarter One 2024
MINDCTI Revenue Release Quarter One 2024MINDCTI Revenue Release Quarter One 2024
MINDCTI Revenue Release Quarter One 2024
 
FWD Group - Insurer Innovation Award 2024
FWD Group - Insurer Innovation Award 2024FWD Group - Insurer Innovation Award 2024
FWD Group - Insurer Innovation Award 2024
 
Data Cloud, More than a CDP by Matt Robison
Data Cloud, More than a CDP by Matt RobisonData Cloud, More than a CDP by Matt Robison
Data Cloud, More than a CDP by Matt Robison
 
A Beginners Guide to Building a RAG App Using Open Source Milvus
A Beginners Guide to Building a RAG App Using Open Source MilvusA Beginners Guide to Building a RAG App Using Open Source Milvus
A Beginners Guide to Building a RAG App Using Open Source Milvus
 
Apidays New York 2024 - Accelerating FinTech Innovation by Vasa Krishnan, Fin...
Apidays New York 2024 - Accelerating FinTech Innovation by Vasa Krishnan, Fin...Apidays New York 2024 - Accelerating FinTech Innovation by Vasa Krishnan, Fin...
Apidays New York 2024 - Accelerating FinTech Innovation by Vasa Krishnan, Fin...
 
Manulife - Insurer Transformation Award 2024
Manulife - Insurer Transformation Award 2024Manulife - Insurer Transformation Award 2024
Manulife - Insurer Transformation Award 2024
 
AXA XL - Insurer Innovation Award Americas 2024
AXA XL - Insurer Innovation Award Americas 2024AXA XL - Insurer Innovation Award Americas 2024
AXA XL - Insurer Innovation Award Americas 2024
 
Corporate and higher education May webinar.pptx
Corporate and higher education May webinar.pptxCorporate and higher education May webinar.pptx
Corporate and higher education May webinar.pptx
 
TrustArc Webinar - Unlock the Power of AI-Driven Data Discovery
TrustArc Webinar - Unlock the Power of AI-Driven Data DiscoveryTrustArc Webinar - Unlock the Power of AI-Driven Data Discovery
TrustArc Webinar - Unlock the Power of AI-Driven Data Discovery
 
DBX First Quarter 2024 Investor Presentation
DBX First Quarter 2024 Investor PresentationDBX First Quarter 2024 Investor Presentation
DBX First Quarter 2024 Investor Presentation
 
"I see eyes in my soup": How Delivery Hero implemented the safety system for ...
"I see eyes in my soup": How Delivery Hero implemented the safety system for ..."I see eyes in my soup": How Delivery Hero implemented the safety system for ...
"I see eyes in my soup": How Delivery Hero implemented the safety system for ...
 
Strategies for Unlocking Knowledge Management in Microsoft 365 in the Copilot...
Strategies for Unlocking Knowledge Management in Microsoft 365 in the Copilot...Strategies for Unlocking Knowledge Management in Microsoft 365 in the Copilot...
Strategies for Unlocking Knowledge Management in Microsoft 365 in the Copilot...
 

SQL coding at Sydney Measure Camp 2018

  • 2.
  • 3. select i_ITEM_id,i_item_desc,i_category,i_class,i_CURRENT_price, sum(cs_EXT_sales_price) as itemrevenue, sum(cs_ext_sales_price) *100/sum(sum(cs_ext_sales_price)) over (partition by I_class) as revenueratio From catalog_sales,item,date_dim where cs_item_sk = i_item_sk AND i_category in ('Jewelry', 'Sports', 'Books') and cs_sold_date_sk = d_date_sk and cast(d_date as timestamp) between cast('2001-01-12' as timestamp) and (cast('2001-01-12' as timestamp) + interval 30 days) group by i_item_id,i_item_desc,i_category,i_class,i_current_price order by i_category,i_class,i_item_id,i_item_desc,revenueratio limit 100;
  • 5.
  • 6. select i_ITEM_id, I_item_desc, I_category, I_class, i_CURRENT_price, sum(cs_EXT_sales_price) as itemrevenue, sum(cs_ext_sales_price) *100/sum(sum(cs_ext_sales_price)) over (partition by I_class) as revenueratio FROM catalog_sales,item,date_dim where cs_item_sk = i_item_sk AND i_category in ('Jewelry', 'Sports', 'Books') and cs_sold_date_sk = d_date_sk and cast(d_date as timestamp) between cast('2001-01-12' as timestamp) and (cast('2001-01-12' as timestamp) + interval 30 days) group by i_item_id,i_item_desc,i_category,i_class,i_current_price order by i_category,i_class,i_item_id,i_item_desc,revenueratio limit 100;
  • 7. select i_ITEM_id, I_item_desc, I_category, I_class, i_CURRENT_price, sum(cs_EXT_sales_price) as itemrevenue, sum(cs_ext_sales_price) *100/sum(sum(cs_ext_sales_price)) over (partition by I_class) as revenueratio FROM catalog_sales,item,date_dim where cs_item_sk = i_item_sk AND i_category in ('Jewelry', 'Sports', 'Books') and cs_sold_date_sk = d_date_sk and cast(d_date as timestamp) between cast('2001-01-12' as timestamp) and (cast('2001-01-12' as timestamp) + interval 30 days) group by i_item_id, I_item_desc, I_category, I_class, i_current_price order by i_category, I_class, I_item_id, I_item_desc, revenueratio limit 100;
  • 8. SELECT i_item_id, ,i_item_desc ,i_category ,i_class ,i_current_price ,SUM(cs_ext_sales_price) AS item_revenue ,SUM(cs_ext_sales_price) * 100 / SUM(SUM(cs_ext_sales_price)) OVER (PARTITION BY i_class) AS revenue_ratio FROM catalog_sales JOIN item ON cs_item_sk = i_item_sk JOIN date_dim ON cs_sold_date_sk = d_date_sk WHERE i_category IN ('Jewelry', 'Sports', 'Books') AND CAST(d_date AS TIMESTAMP) BETWEEN CAST('2001-01-12' AS TIMESTAMP) AND CAST('2001-01-12' AS TIMESTAMP) + INTERVAL 30 DAYS GROUP BY 1, 2, 3, 4, 5 ORDER BY 3, 4, 1 LIMIT 100
  • 9. SELECT item.i_item_id, item.i_item_desc, item.i_category, item.i_class, item.i_current_price, SUM(catalog_sales.cs_ext_sales_price) AS item_revenue, SUM(catalog_sales.cs_ext_sales_price) * 100 / SUM(SUM(catalog_sales.cs_ext_sales_price)) OVER (PARTITION BY item.i_class) AS revenue_ratio FROM catalog_sales JOIN item ON catalog_sales.cs_item_sk = item.i_item_sk JOIN date_dim ON catalog_sales.cs_sold_date_sk = date_dim.d_date_sk WHERE item.i_category IN ('Jewelry', 'Sports', 'Books') AND CAST(date_dim.d_date AS TIMESTAMP) BETWEEN CAST('2001-01-12' AS TIMESTAMP) AND CAST('2001-01-12' AS TIMESTAMP) + INTERVAL 30 DAYS GROUP BY 1, 2, 3, 4, 5 ORDER BY 3, 4, 1 LIMIT 100
  • 10. Over commenting -- get id, name and open only once SELECT DISTINCT t1.id , t1.name , t3.open -- select names from table 1, order them by date and return first 3 FROM (SELECT id , name FROM table1 ORDER BY date DESC LIMIT 3) AS t1 -- get open from table 2 IF id is there, open = 1 and type =2 LEFT JOIN ( ( SELECT open , name_id FROM table2 WHERE open=1 AND type=2 ) AS t3 ) ON t1.id = t3.name_id -- order by name from A-Z ORDER BY t1.name ASC
  • 11.
  • 12.
  • 13. Follow Patterns Indentation Don’t over comment - just clear code Remove commented lines of code Use alias on all column when joining Know your PKs and Unique keys Make easy future maintenance Execution time JUST select the tables/columns which be in USE
  • 15.
  • 16. WITH patient_data AS (SELECT patient_id, patient_name, hospital, drug_dosage FROM hospital_registry WHERE (COALESCE(last_visit,NOW()) > NOW() - INTERVAL '14 days') AND city = "Los Angeles" ), average_dosage AS (SELECT hospital, AVG(drug_dosage) AS Average FROM patient_data GROUP BY hospital ) SELECT count(hospital) FROM average_dosage WHERE drug_dosage > 1000
  • 17. Master the use of: Functions Window Functions (OLAP functions) CTE - Common Table Expression Views UDF - User Defined Function
  • 18. Lost
  • 19.
  • 20. Data Structure Build an ERD if you don’t have Primary Keys Unique Keys Table Size Number of columns (columnar databases) KNOW YOUR DATA
  • 22. Modelling techniques Transactional 3NF Star Schema - Data Marts Integration - Data Vault Data Lake - Big Data Flat tables
  • 23. Data Lake or Data swamp
  • 24. AVOID the journey to a SWAMP Organize your data and contents Use Name conventions - rules Be aware of object creation Document them on same way
  • 26.
  • 27. Customer id name date Sales id date Cust_id amount Customer id_customer name date_of_birth Sales id_sales sales_date fk_customer amount_inc_tax SELECT c.name AS customer_name, c.date AS customer.dob, s.date AS sales_date, COUNT(1) AS no_of_sales, SUM(amount) AS amount FROM customer c JOIN sales s ON c.id = s.id SELECT c.name AS customer_name, c.date_of_birth, s.sales_date, COUNT(1) no_of_sales, SUM(amount_inc_tax) AS amount_inc_tax FROM customer c JOIN sales s ON c.id_customer = s.fk_customer
  • 28. Minimise usage of non standard abbreviations Don’t use too long names - you will need to type them one day PK & FK should be a pattern ID & table name, FK and link table Maybe: Data types definition on names like: price_amt, tax_pct
  • 30. Know you databases & tools Differences, limitations, strength and weakness Columnar databases Functionalities Access & Security
  • 31. Help make a better world with beautiful code!!!