SlideShare une entreprise Scribd logo
1  sur  20
Télécharger pour lire hors ligne
Introduction To Cache-Oblivious Algorithms
by Christopher Gilbert
http://www.twitter.com/bigdatadev
http://www.github.com/bigdatadev


http://www.cjgilbert.me
“A cache-oblivious algorithm is not oblivious to
cache memory”
(However, it is oblivious to the size of the cache)
“Cache-oblivious algorithms are
effective on any system, regardless
of memory hierarchy”
“Cache oblivious algorithms do not improve
complexity.”
(But they still improve performance)
Processor
Graphics
Core Core Core Core
Shared L3 Cache
Memory Controller I/O
L1 Cache
L2 Cache
L1 Cache
L2 Cache
L1 Cache
L2 Cache
L1 Cache
L2 Cache
System
Agent &
Memory
Controller
Register
L1 Cache
L2 Cache
L3 Cache
Memory
Disk
LatencyIncreases
CapacityDecreases
Tall Cache Assumption
Fully Associative
Perfect Eviction Policy
MT(N) = (N/B)
Estimating Memory Transfers
struct item_t {
	 uint64_t x;
	 uint64_t y;
};
bool
predicate(item_t const* i1, item_t const* i2) {
	 return ((i1->x + i2->y) * i2->x == (i1->y + i2->x) * i2->y);
}
size_t
kernel(item_t const* begin1, item_t const* end1,
item_t const* begin2, item_t const* end2) {
	 size_t count = 0;
	 for (item_t const* pos1 = begin1; pos1 != end1; pos1++) {
		for (item_t const* pos2 = begin2; pos2 != end2; pos2++) {
			if (predicate(pos1, pos2))
				count += 1;
		}
	 }
	 return count;
}
size_t
simple_parallel(item_t const* begin, size_t count,
unsigned thread_count) {
	 size_t res = 0;
#pragma omp parallel for reduction(+:res) num_threads(thread_count)
	 for (int i = 0; i < (int)count; i += 1)
		res += kernel(begin + i, begin + i + 1, begin, begin + count);
	 return res;
}
Memory Transfer Estimate
MT(N) = (N2
/B)
0
5000
10000
15000
20000
25000
30000
Number Of Threads
Time(ms)
Recursive Approach
class coba_task : public task {
public:
	 task* execute() {
		if (_count1 + _count2 > 256) {
			coba_task& a = *new(allocate_child()) coba_task(
				_begin1, _count1 / 2,
				_begin2, _count2 / 2
			);
			coba_task& b = *new(allocate_child()) coba_task(
				_begin1, _count1 / 2,
				_begin2 + _count2 / 2, _count2 - _count2 / 2
			);
			coba_task& c = *new(allocate_child()) coba_task(
				_begin1 + _count1 / 2, _count1 - _count1 / 2,
				_begin2 + _count2 / 2, _count2 - _count2 / 2
			);
			coba_task& d = *new(allocate_child()) coba_task(
				_begin1 + _count1 / 2, _count1 - _count1 / 2,
				_begin2, _count2 / 2
			);
			set_ref_count(5);
			spawn(b); spawn(c); spawn(d); spawn_and_wait_for_all(a);
			_result = a.result() + b.result() + c.result() + d.result();
		}
		else {
			_result = kernel(_begin1, _begin1 + _count1, _begin2, _begin2 + _count2);
		}
		return NULL;
	 }
};
size_t
recursive_parallel(item_t const* begin, size_t count,
unsigned thread_count) {
	 coba_task& a = *new(task::allocate_root()) coba_task(
		begin, count / 2,
		begin + count / 2, count / 2
	 );
	 task::spawn_root_and_wait(a);
	 return a.result();
}
Revised Memory Transfer Estimate
MT(N) = (N2
/CB)
0
5000
10000
15000
20000
25000
30000
Number Of Threads
Time(ms)
Exploit both spatial and temporal locality.
Use recursion.
Optimise your memory transfers.

Contenu connexe

Tendances

Parallel algorithms
Parallel algorithmsParallel algorithms
Parallel algorithmsguest084d20
 
Faster Practical Block Compression for Rank/Select Dictionaries
Faster Practical Block Compression for Rank/Select DictionariesFaster Practical Block Compression for Rank/Select Dictionaries
Faster Practical Block Compression for Rank/Select DictionariesRakuten Group, Inc.
 
El text.tokuron a(2019).jung190711
El text.tokuron a(2019).jung190711El text.tokuron a(2019).jung190711
El text.tokuron a(2019).jung190711RCCSRENKEI
 
Tpr star tree
Tpr star treeTpr star tree
Tpr star treeWin Yu
 
International Journal of Engineering Research and Development (IJERD)
International Journal of Engineering Research and Development (IJERD)International Journal of Engineering Research and Development (IJERD)
International Journal of Engineering Research and Development (IJERD)IJERD Editor
 
how to calclute time complexity of algortihm
how to calclute time complexity of algortihmhow to calclute time complexity of algortihm
how to calclute time complexity of algortihmSajid Marwat
 
Introduction For seq2seq(sequence to sequence) and RNN
Introduction For seq2seq(sequence to sequence) and RNNIntroduction For seq2seq(sequence to sequence) and RNN
Introduction For seq2seq(sequence to sequence) and RNNHye-min Ahn
 
EC202 SIGNALS & SYSTEMS PREVIOUS QUESTION PAPER
EC202 SIGNALS & SYSTEMS PREVIOUS QUESTION PAPEREC202 SIGNALS & SYSTEMS PREVIOUS QUESTION PAPER
EC202 SIGNALS & SYSTEMS PREVIOUS QUESTION PAPERVISHNUPRABHANKAIMAL
 
Fast Identification of Heavy Hitters by Cached and Packed Group Testing
Fast Identification of Heavy Hitters by Cached and Packed Group TestingFast Identification of Heavy Hitters by Cached and Packed Group Testing
Fast Identification of Heavy Hitters by Cached and Packed Group TestingRakuten Group, Inc.
 
SchNet: A continuous-filter convolutional neural network for modeling quantum...
SchNet: A continuous-filter convolutional neural network for modeling quantum...SchNet: A continuous-filter convolutional neural network for modeling quantum...
SchNet: A continuous-filter convolutional neural network for modeling quantum...Kazuki Fujikawa
 
Mapping Parallel Programs into Hierarchical Distributed Computer Systems
Mapping Parallel Programs into Hierarchical Distributed Computer SystemsMapping Parallel Programs into Hierarchical Distributed Computer Systems
Mapping Parallel Programs into Hierarchical Distributed Computer SystemsMikhail Kurnosov
 
18103010 algorithm complexity (iterative)
18103010 algorithm complexity (iterative)18103010 algorithm complexity (iterative)
18103010 algorithm complexity (iterative)AdityaKhandelwal58
 
Shor's discrete logarithm quantum algorithm for elliptic curves
 Shor's discrete logarithm quantum algorithm for elliptic curves Shor's discrete logarithm quantum algorithm for elliptic curves
Shor's discrete logarithm quantum algorithm for elliptic curvesXequeMateShannon
 
Jeff Johnson, Research Engineer, Facebook at MLconf NYC
Jeff Johnson, Research Engineer, Facebook at MLconf NYCJeff Johnson, Research Engineer, Facebook at MLconf NYC
Jeff Johnson, Research Engineer, Facebook at MLconf NYCMLconf
 

Tendances (20)

Parallel searching
Parallel searchingParallel searching
Parallel searching
 
Parallel algorithms
Parallel algorithmsParallel algorithms
Parallel algorithms
 
Faster Practical Block Compression for Rank/Select Dictionaries
Faster Practical Block Compression for Rank/Select DictionariesFaster Practical Block Compression for Rank/Select Dictionaries
Faster Practical Block Compression for Rank/Select Dictionaries
 
El text.tokuron a(2019).jung190711
El text.tokuron a(2019).jung190711El text.tokuron a(2019).jung190711
El text.tokuron a(2019).jung190711
 
Tpr star tree
Tpr star treeTpr star tree
Tpr star tree
 
International Journal of Engineering Research and Development (IJERD)
International Journal of Engineering Research and Development (IJERD)International Journal of Engineering Research and Development (IJERD)
International Journal of Engineering Research and Development (IJERD)
 
how to calclute time complexity of algortihm
how to calclute time complexity of algortihmhow to calclute time complexity of algortihm
how to calclute time complexity of algortihm
 
Introduction For seq2seq(sequence to sequence) and RNN
Introduction For seq2seq(sequence to sequence) and RNNIntroduction For seq2seq(sequence to sequence) and RNN
Introduction For seq2seq(sequence to sequence) and RNN
 
Complexity analysis in Algorithms
Complexity analysis in AlgorithmsComplexity analysis in Algorithms
Complexity analysis in Algorithms
 
EC202 SIGNALS & SYSTEMS PREVIOUS QUESTION PAPER
EC202 SIGNALS & SYSTEMS PREVIOUS QUESTION PAPEREC202 SIGNALS & SYSTEMS PREVIOUS QUESTION PAPER
EC202 SIGNALS & SYSTEMS PREVIOUS QUESTION PAPER
 
Fast Identification of Heavy Hitters by Cached and Packed Group Testing
Fast Identification of Heavy Hitters by Cached and Packed Group TestingFast Identification of Heavy Hitters by Cached and Packed Group Testing
Fast Identification of Heavy Hitters by Cached and Packed Group Testing
 
Tscd pweb
Tscd pwebTscd pweb
Tscd pweb
 
SchNet: A continuous-filter convolutional neural network for modeling quantum...
SchNet: A continuous-filter convolutional neural network for modeling quantum...SchNet: A continuous-filter convolutional neural network for modeling quantum...
SchNet: A continuous-filter convolutional neural network for modeling quantum...
 
Mapping Parallel Programs into Hierarchical Distributed Computer Systems
Mapping Parallel Programs into Hierarchical Distributed Computer SystemsMapping Parallel Programs into Hierarchical Distributed Computer Systems
Mapping Parallel Programs into Hierarchical Distributed Computer Systems
 
18103010 algorithm complexity (iterative)
18103010 algorithm complexity (iterative)18103010 algorithm complexity (iterative)
18103010 algorithm complexity (iterative)
 
1
11
1
 
Shor's discrete logarithm quantum algorithm for elliptic curves
 Shor's discrete logarithm quantum algorithm for elliptic curves Shor's discrete logarithm quantum algorithm for elliptic curves
Shor's discrete logarithm quantum algorithm for elliptic curves
 
Parallel Algorithms
Parallel AlgorithmsParallel Algorithms
Parallel Algorithms
 
Jeff Johnson, Research Engineer, Facebook at MLconf NYC
Jeff Johnson, Research Engineer, Facebook at MLconf NYCJeff Johnson, Research Engineer, Facebook at MLconf NYC
Jeff Johnson, Research Engineer, Facebook at MLconf NYC
 
Parallel computation
Parallel computationParallel computation
Parallel computation
 

Similaire à Introduction to Cache-Oblivious Algorithms

Algorithm analysis.pptx
Algorithm analysis.pptxAlgorithm analysis.pptx
Algorithm analysis.pptxDrBashirMSaad
 
ISCA Final Presentaiton - Compilations
ISCA Final Presentaiton -  CompilationsISCA Final Presentaiton -  Compilations
ISCA Final Presentaiton - CompilationsHSA Foundation
 
Apache Spark Internals - Part 2
Apache Spark Internals - Part 2Apache Spark Internals - Part 2
Apache Spark Internals - Part 2Jéferson Machado
 
COSCUP: Introduction to Julia
COSCUP: Introduction to JuliaCOSCUP: Introduction to Julia
COSCUP: Introduction to Julia岳華 杜
 
Task based Programming with OmpSs and its Application
Task based Programming with OmpSs and its ApplicationTask based Programming with OmpSs and its Application
Task based Programming with OmpSs and its ApplicationFacultad de Informática UCM
 
Quantization and Training of Neural Networks for Efficient Integer-Arithmetic...
Quantization and Training of Neural Networks for Efficient Integer-Arithmetic...Quantization and Training of Neural Networks for Efficient Integer-Arithmetic...
Quantization and Training of Neural Networks for Efficient Integer-Arithmetic...Ryo Takahashi
 
Parallel R in snow (english after 2nd slide)
Parallel R in snow (english after 2nd slide)Parallel R in snow (english after 2nd slide)
Parallel R in snow (english after 2nd slide)Cdiscount
 
Deuce STM - CMP'09
Deuce STM - CMP'09Deuce STM - CMP'09
Deuce STM - CMP'09Guy Korland
 
The art of reverse engineering flash exploits
The art of reverse engineering flash exploitsThe art of reverse engineering flash exploits
The art of reverse engineering flash exploitsPriyanka Aash
 
Skiena algorithm 2007 lecture15 backtracing
Skiena algorithm 2007 lecture15 backtracingSkiena algorithm 2007 lecture15 backtracing
Skiena algorithm 2007 lecture15 backtracingzukun
 
Introduction to julia
Introduction to juliaIntroduction to julia
Introduction to julia岳華 杜
 
20181212 - PGconfASIA - LT - English
20181212 - PGconfASIA - LT - English20181212 - PGconfASIA - LT - English
20181212 - PGconfASIA - LT - EnglishKohei KaiGai
 
Write Python for Speed
Write Python for SpeedWrite Python for Speed
Write Python for SpeedYung-Yu Chen
 
Testing in those hard to reach places
Testing in those hard to reach placesTesting in those hard to reach places
Testing in those hard to reach placesdn
 

Similaire à Introduction to Cache-Oblivious Algorithms (20)

Xgboost
XgboostXgboost
Xgboost
 
Algorithm analysis.pptx
Algorithm analysis.pptxAlgorithm analysis.pptx
Algorithm analysis.pptx
 
Scala to assembly
Scala to assemblyScala to assembly
Scala to assembly
 
ISCA Final Presentaiton - Compilations
ISCA Final Presentaiton -  CompilationsISCA Final Presentaiton -  Compilations
ISCA Final Presentaiton - Compilations
 
Apache Spark Internals - Part 2
Apache Spark Internals - Part 2Apache Spark Internals - Part 2
Apache Spark Internals - Part 2
 
COSCUP: Introduction to Julia
COSCUP: Introduction to JuliaCOSCUP: Introduction to Julia
COSCUP: Introduction to Julia
 
Task based Programming with OmpSs and its Application
Task based Programming with OmpSs and its ApplicationTask based Programming with OmpSs and its Application
Task based Programming with OmpSs and its Application
 
Quantization and Training of Neural Networks for Efficient Integer-Arithmetic...
Quantization and Training of Neural Networks for Efficient Integer-Arithmetic...Quantization and Training of Neural Networks for Efficient Integer-Arithmetic...
Quantization and Training of Neural Networks for Efficient Integer-Arithmetic...
 
Parallel R in snow (english after 2nd slide)
Parallel R in snow (english after 2nd slide)Parallel R in snow (english after 2nd slide)
Parallel R in snow (english after 2nd slide)
 
Deuce STM - CMP'09
Deuce STM - CMP'09Deuce STM - CMP'09
Deuce STM - CMP'09
 
The art of reverse engineering flash exploits
The art of reverse engineering flash exploitsThe art of reverse engineering flash exploits
The art of reverse engineering flash exploits
 
Lesson 39
Lesson 39Lesson 39
Lesson 39
 
AI Lesson 39
AI Lesson 39AI Lesson 39
AI Lesson 39
 
AA_Unit 1_part-I.pptx
AA_Unit 1_part-I.pptxAA_Unit 1_part-I.pptx
AA_Unit 1_part-I.pptx
 
Memory model
Memory modelMemory model
Memory model
 
Skiena algorithm 2007 lecture15 backtracing
Skiena algorithm 2007 lecture15 backtracingSkiena algorithm 2007 lecture15 backtracing
Skiena algorithm 2007 lecture15 backtracing
 
Introduction to julia
Introduction to juliaIntroduction to julia
Introduction to julia
 
20181212 - PGconfASIA - LT - English
20181212 - PGconfASIA - LT - English20181212 - PGconfASIA - LT - English
20181212 - PGconfASIA - LT - English
 
Write Python for Speed
Write Python for SpeedWrite Python for Speed
Write Python for Speed
 
Testing in those hard to reach places
Testing in those hard to reach placesTesting in those hard to reach places
Testing in those hard to reach places
 

Dernier

Six Myths about Ontologies: The Basics of Formal Ontology
Six Myths about Ontologies: The Basics of Formal OntologySix Myths about Ontologies: The Basics of Formal Ontology
Six Myths about Ontologies: The Basics of Formal Ontologyjohnbeverley2021
 
EMPOWERMENT TECHNOLOGY GRADE 11 QUARTER 2 REVIEWER
EMPOWERMENT TECHNOLOGY GRADE 11 QUARTER 2 REVIEWEREMPOWERMENT TECHNOLOGY GRADE 11 QUARTER 2 REVIEWER
EMPOWERMENT TECHNOLOGY GRADE 11 QUARTER 2 REVIEWERMadyBayot
 
Navigating the Deluge_ Dubai Floods and the Resilience of Dubai International...
Navigating the Deluge_ Dubai Floods and the Resilience of Dubai International...Navigating the Deluge_ Dubai Floods and the Resilience of Dubai International...
Navigating the Deluge_ Dubai Floods and the Resilience of Dubai International...Orbitshub
 
Introduction to Multilingual Retrieval Augmented Generation (RAG)
Introduction to Multilingual Retrieval Augmented Generation (RAG)Introduction to Multilingual Retrieval Augmented Generation (RAG)
Introduction to Multilingual Retrieval Augmented Generation (RAG)Zilliz
 
"I see eyes in my soup": How Delivery Hero implemented the safety system for ...
"I see eyes in my soup": How Delivery Hero implemented the safety system for ..."I see eyes in my soup": How Delivery Hero implemented the safety system for ...
"I see eyes in my soup": How Delivery Hero implemented the safety system for ...Zilliz
 
Web Form Automation for Bonterra Impact Management (fka Social Solutions Apri...
Web Form Automation for Bonterra Impact Management (fka Social Solutions Apri...Web Form Automation for Bonterra Impact Management (fka Social Solutions Apri...
Web Form Automation for Bonterra Impact Management (fka Social Solutions Apri...Jeffrey Haguewood
 
Cloud Frontiers: A Deep Dive into Serverless Spatial Data and FME
Cloud Frontiers:  A Deep Dive into Serverless Spatial Data and FMECloud Frontiers:  A Deep Dive into Serverless Spatial Data and FME
Cloud Frontiers: A Deep Dive into Serverless Spatial Data and FMESafe Software
 
Apidays New York 2024 - Scaling API-first by Ian Reasor and Radu Cotescu, Adobe
Apidays New York 2024 - Scaling API-first by Ian Reasor and Radu Cotescu, AdobeApidays New York 2024 - Scaling API-first by Ian Reasor and Radu Cotescu, Adobe
Apidays New York 2024 - Scaling API-first by Ian Reasor and Radu Cotescu, Adobeapidays
 
Polkadot JAM Slides - Token2049 - By Dr. Gavin Wood
Polkadot JAM Slides - Token2049 - By Dr. Gavin WoodPolkadot JAM Slides - Token2049 - By Dr. Gavin Wood
Polkadot JAM Slides - Token2049 - By Dr. Gavin WoodJuan lago vázquez
 
MINDCTI Revenue Release Quarter One 2024
MINDCTI Revenue Release Quarter One 2024MINDCTI Revenue Release Quarter One 2024
MINDCTI Revenue Release Quarter One 2024MIND CTI
 
Emergent Methods: Multi-lingual narrative tracking in the news - real-time ex...
Emergent Methods: Multi-lingual narrative tracking in the news - real-time ex...Emergent Methods: Multi-lingual narrative tracking in the news - real-time ex...
Emergent Methods: Multi-lingual narrative tracking in the news - real-time ex...Zilliz
 
Exploring Multimodal Embeddings with Milvus
Exploring Multimodal Embeddings with MilvusExploring Multimodal Embeddings with Milvus
Exploring Multimodal Embeddings with MilvusZilliz
 
ProductAnonymous-April2024-WinProductDiscovery-MelissaKlemke
ProductAnonymous-April2024-WinProductDiscovery-MelissaKlemkeProductAnonymous-April2024-WinProductDiscovery-MelissaKlemke
ProductAnonymous-April2024-WinProductDiscovery-MelissaKlemkeProduct Anonymous
 
WSO2's API Vision: Unifying Control, Empowering Developers
WSO2's API Vision: Unifying Control, Empowering DevelopersWSO2's API Vision: Unifying Control, Empowering Developers
WSO2's API Vision: Unifying Control, Empowering DevelopersWSO2
 
Cloud Frontiers: A Deep Dive into Serverless Spatial Data and FME
Cloud Frontiers:  A Deep Dive into Serverless Spatial Data and FMECloud Frontiers:  A Deep Dive into Serverless Spatial Data and FME
Cloud Frontiers: A Deep Dive into Serverless Spatial Data and FMESafe Software
 
CNIC Information System with Pakdata Cf In Pakistan
CNIC Information System with Pakdata Cf In PakistanCNIC Information System with Pakdata Cf In Pakistan
CNIC Information System with Pakdata Cf In Pakistandanishmna97
 
DBX First Quarter 2024 Investor Presentation
DBX First Quarter 2024 Investor PresentationDBX First Quarter 2024 Investor Presentation
DBX First Quarter 2024 Investor PresentationDropbox
 
Corporate and higher education May webinar.pptx
Corporate and higher education May webinar.pptxCorporate and higher education May webinar.pptx
Corporate and higher education May webinar.pptxRustici Software
 
Apidays New York 2024 - APIs in 2030: The Risk of Technological Sleepwalk by ...
Apidays New York 2024 - APIs in 2030: The Risk of Technological Sleepwalk by ...Apidays New York 2024 - APIs in 2030: The Risk of Technological Sleepwalk by ...
Apidays New York 2024 - APIs in 2030: The Risk of Technological Sleepwalk by ...apidays
 

Dernier (20)

Six Myths about Ontologies: The Basics of Formal Ontology
Six Myths about Ontologies: The Basics of Formal OntologySix Myths about Ontologies: The Basics of Formal Ontology
Six Myths about Ontologies: The Basics of Formal Ontology
 
EMPOWERMENT TECHNOLOGY GRADE 11 QUARTER 2 REVIEWER
EMPOWERMENT TECHNOLOGY GRADE 11 QUARTER 2 REVIEWEREMPOWERMENT TECHNOLOGY GRADE 11 QUARTER 2 REVIEWER
EMPOWERMENT TECHNOLOGY GRADE 11 QUARTER 2 REVIEWER
 
Navigating the Deluge_ Dubai Floods and the Resilience of Dubai International...
Navigating the Deluge_ Dubai Floods and the Resilience of Dubai International...Navigating the Deluge_ Dubai Floods and the Resilience of Dubai International...
Navigating the Deluge_ Dubai Floods and the Resilience of Dubai International...
 
Introduction to Multilingual Retrieval Augmented Generation (RAG)
Introduction to Multilingual Retrieval Augmented Generation (RAG)Introduction to Multilingual Retrieval Augmented Generation (RAG)
Introduction to Multilingual Retrieval Augmented Generation (RAG)
 
"I see eyes in my soup": How Delivery Hero implemented the safety system for ...
"I see eyes in my soup": How Delivery Hero implemented the safety system for ..."I see eyes in my soup": How Delivery Hero implemented the safety system for ...
"I see eyes in my soup": How Delivery Hero implemented the safety system for ...
 
Web Form Automation for Bonterra Impact Management (fka Social Solutions Apri...
Web Form Automation for Bonterra Impact Management (fka Social Solutions Apri...Web Form Automation for Bonterra Impact Management (fka Social Solutions Apri...
Web Form Automation for Bonterra Impact Management (fka Social Solutions Apri...
 
+971581248768>> SAFE AND ORIGINAL ABORTION PILLS FOR SALE IN DUBAI AND ABUDHA...
+971581248768>> SAFE AND ORIGINAL ABORTION PILLS FOR SALE IN DUBAI AND ABUDHA...+971581248768>> SAFE AND ORIGINAL ABORTION PILLS FOR SALE IN DUBAI AND ABUDHA...
+971581248768>> SAFE AND ORIGINAL ABORTION PILLS FOR SALE IN DUBAI AND ABUDHA...
 
Cloud Frontiers: A Deep Dive into Serverless Spatial Data and FME
Cloud Frontiers:  A Deep Dive into Serverless Spatial Data and FMECloud Frontiers:  A Deep Dive into Serverless Spatial Data and FME
Cloud Frontiers: A Deep Dive into Serverless Spatial Data and FME
 
Apidays New York 2024 - Scaling API-first by Ian Reasor and Radu Cotescu, Adobe
Apidays New York 2024 - Scaling API-first by Ian Reasor and Radu Cotescu, AdobeApidays New York 2024 - Scaling API-first by Ian Reasor and Radu Cotescu, Adobe
Apidays New York 2024 - Scaling API-first by Ian Reasor and Radu Cotescu, Adobe
 
Polkadot JAM Slides - Token2049 - By Dr. Gavin Wood
Polkadot JAM Slides - Token2049 - By Dr. Gavin WoodPolkadot JAM Slides - Token2049 - By Dr. Gavin Wood
Polkadot JAM Slides - Token2049 - By Dr. Gavin Wood
 
MINDCTI Revenue Release Quarter One 2024
MINDCTI Revenue Release Quarter One 2024MINDCTI Revenue Release Quarter One 2024
MINDCTI Revenue Release Quarter One 2024
 
Emergent Methods: Multi-lingual narrative tracking in the news - real-time ex...
Emergent Methods: Multi-lingual narrative tracking in the news - real-time ex...Emergent Methods: Multi-lingual narrative tracking in the news - real-time ex...
Emergent Methods: Multi-lingual narrative tracking in the news - real-time ex...
 
Exploring Multimodal Embeddings with Milvus
Exploring Multimodal Embeddings with MilvusExploring Multimodal Embeddings with Milvus
Exploring Multimodal Embeddings with Milvus
 
ProductAnonymous-April2024-WinProductDiscovery-MelissaKlemke
ProductAnonymous-April2024-WinProductDiscovery-MelissaKlemkeProductAnonymous-April2024-WinProductDiscovery-MelissaKlemke
ProductAnonymous-April2024-WinProductDiscovery-MelissaKlemke
 
WSO2's API Vision: Unifying Control, Empowering Developers
WSO2's API Vision: Unifying Control, Empowering DevelopersWSO2's API Vision: Unifying Control, Empowering Developers
WSO2's API Vision: Unifying Control, Empowering Developers
 
Cloud Frontiers: A Deep Dive into Serverless Spatial Data and FME
Cloud Frontiers:  A Deep Dive into Serverless Spatial Data and FMECloud Frontiers:  A Deep Dive into Serverless Spatial Data and FME
Cloud Frontiers: A Deep Dive into Serverless Spatial Data and FME
 
CNIC Information System with Pakdata Cf In Pakistan
CNIC Information System with Pakdata Cf In PakistanCNIC Information System with Pakdata Cf In Pakistan
CNIC Information System with Pakdata Cf In Pakistan
 
DBX First Quarter 2024 Investor Presentation
DBX First Quarter 2024 Investor PresentationDBX First Quarter 2024 Investor Presentation
DBX First Quarter 2024 Investor Presentation
 
Corporate and higher education May webinar.pptx
Corporate and higher education May webinar.pptxCorporate and higher education May webinar.pptx
Corporate and higher education May webinar.pptx
 
Apidays New York 2024 - APIs in 2030: The Risk of Technological Sleepwalk by ...
Apidays New York 2024 - APIs in 2030: The Risk of Technological Sleepwalk by ...Apidays New York 2024 - APIs in 2030: The Risk of Technological Sleepwalk by ...
Apidays New York 2024 - APIs in 2030: The Risk of Technological Sleepwalk by ...
 

Introduction to Cache-Oblivious Algorithms

  • 1. Introduction To Cache-Oblivious Algorithms by Christopher Gilbert http://www.twitter.com/bigdatadev http://www.github.com/bigdatadev   http://www.cjgilbert.me
  • 2. “A cache-oblivious algorithm is not oblivious to cache memory” (However, it is oblivious to the size of the cache)
  • 3. “Cache-oblivious algorithms are effective on any system, regardless of memory hierarchy”
  • 4. “Cache oblivious algorithms do not improve complexity.” (But they still improve performance)
  • 5. Processor Graphics Core Core Core Core Shared L3 Cache Memory Controller I/O L1 Cache L2 Cache L1 Cache L2 Cache L1 Cache L2 Cache L1 Cache L2 Cache System Agent & Memory Controller
  • 6. Register L1 Cache L2 Cache L3 Cache Memory Disk LatencyIncreases CapacityDecreases
  • 10. MT(N) = (N/B) Estimating Memory Transfers
  • 11. struct item_t { uint64_t x; uint64_t y; }; bool predicate(item_t const* i1, item_t const* i2) { return ((i1->x + i2->y) * i2->x == (i1->y + i2->x) * i2->y); } size_t kernel(item_t const* begin1, item_t const* end1, item_t const* begin2, item_t const* end2) { size_t count = 0; for (item_t const* pos1 = begin1; pos1 != end1; pos1++) { for (item_t const* pos2 = begin2; pos2 != end2; pos2++) { if (predicate(pos1, pos2)) count += 1; } } return count; } size_t simple_parallel(item_t const* begin, size_t count, unsigned thread_count) { size_t res = 0; #pragma omp parallel for reduction(+:res) num_threads(thread_count) for (int i = 0; i < (int)count; i += 1) res += kernel(begin + i, begin + i + 1, begin, begin + count); return res; }
  • 15. class coba_task : public task { public: task* execute() { if (_count1 + _count2 > 256) { coba_task& a = *new(allocate_child()) coba_task( _begin1, _count1 / 2, _begin2, _count2 / 2 ); coba_task& b = *new(allocate_child()) coba_task( _begin1, _count1 / 2, _begin2 + _count2 / 2, _count2 - _count2 / 2 ); coba_task& c = *new(allocate_child()) coba_task( _begin1 + _count1 / 2, _count1 - _count1 / 2, _begin2 + _count2 / 2, _count2 - _count2 / 2 ); coba_task& d = *new(allocate_child()) coba_task( _begin1 + _count1 / 2, _count1 - _count1 / 2, _begin2, _count2 / 2 ); set_ref_count(5); spawn(b); spawn(c); spawn(d); spawn_and_wait_for_all(a); _result = a.result() + b.result() + c.result() + d.result(); } else { _result = kernel(_begin1, _begin1 + _count1, _begin2, _begin2 + _count2); } return NULL; } }; size_t recursive_parallel(item_t const* begin, size_t count, unsigned thread_count) { coba_task& a = *new(task::allocate_root()) coba_task( begin, count / 2, begin + count / 2, count / 2 ); task::spawn_root_and_wait(a); return a.result(); }
  • 16. Revised Memory Transfer Estimate MT(N) = (N2 /CB)
  • 18. Exploit both spatial and temporal locality.
  • 20. Optimise your memory transfers.