10. int SIZE = 1000000;
int NB_ARRAY = 50;
long[][] longs
= new long[NB_ARRAY][SIZE];
long result = 0;
for (int j=0; j<SIZE; j++)
for (int i=0; i<NB_ARRAY; i++)
result += longs[i][j];
int SIZE = 1000000;
int NB_ARRAY = 50;
long[][] longs
= new long[NB_ARRAY][SIZE];
long result = 0;
for (int i=0; i<NB_ARRAY; i++)
for (int j=0; j<SIZE; j++)
result += longs[i][j];
Qui est le plus rapide ?
1 2
13. int SIZE = 1000000;
int NB_ARRAY = 50;
long[][] longs
= new long[NB_ARRAY][SIZE];
long result = 0;
for (int i=0; i<NB_ARRAY; i++)
for (int j=0; j<SIZE; j++)
result += longs[i][j];
Friendly
14. int SIZE = 1000000;
int NB_ARRAY = 50;
long[][] longs
= new long[NB_ARRAY][SIZE];
long result = 0;
for (int j = 0; j<SIZE;j++)
for (int i=0; i<NB_ARRAY; i++)
result += longs[i][j];
Pas friendly du tout
25. Random rand = new Random();
IntStream stream = rand.ints(10 * 1024 * 1024, 0, 2);
26. Random rand = new Random();
IntStream stream = rand.ints(10 * 1024 * 1024, 0, 2);
int sum = stream.sum();
27. Random rand = new Random();
IntStream stream = rand.ints(10 * 1024 * 1024, 0, 2);
int sum = stream.sum();
int sum = stream.parallel().sum();
28. Random rand = new Random();
IntStream stream = rand.ints(10 * 1024 * 1024, 0, 2);
int sum = stream.sum();
int sum = stream.parallel().sum();
113 ms
29. Random rand = new Random();
IntStream stream = rand.ints(10 * 1024 * 1024, 0, 2);
int sum = stream.sum();
int sum = stream.parallel().sum(); 1167 ms
157 ms
50. producerIndex = 42
Concurrent Reading and Writing, Leslie Lamport, 1977
E E null null null null null null null E
offset = 2
consumerIndex = 39 offset = 9
Lamport Queue
51. producerIndex = 42
offset = 2
consumerIndex = 39
offset = 9
Lamport Queue
nullnull
E
E
E
nullnull
null
nullnull
Concurrent Reading and Writing, Leslie Lamport, 1977
52. import java.util.AbstractQueue;
public final class LamportQueue1<E> extends AbstractQueue<E> {
private final E[] buffer;
private volatile long producerIndex = 0;
private volatile long consumerIndex = 0;
public LamportQueue1(int capacity) {
buffer = (E[]) new Object[capacity];
}
@Override
public int size() {
return (int) (producerIndex -‐ consumerIndex);
}
53. @Override
public boolean offer(final E e) {
if (size() == buffer.length) {
return false;
}
final int offset = (int)(producerIndex % buffer.length);
buffer[offset] = e;
producerIndex++;
return true;
}
54. @Override
public boolean offer(final E e) {
if (size() == buffer.length) {
return false;
}
final int offset = (int)(producerIndex % buffer.length);
buffer[offset] = e;
producerIndex++;
return true;
}
55. @Override
public boolean offer(final E e) {
if (size() == buffer.length) {
return false;
}
final int offset = (int)(producerIndex % buffer.length);
buffer[offset] = e;
producerIndex++;
return true;
}
56. @Override
public boolean offer(final E e) {
if (size() == buffer.length) {
return false;
}
final int offset = (int)(producerIndex % buffer.length);
buffer[offset] = e;
producerIndex++;
return true;
}
57. @Override
public E poll() {
if (consumerIndex == producerIndex) {
return null;
}
final int offset = (int)(consumerIndex % buffer.length);
final E e = buffer[offset];
buffer[offset] = null;
consumerIndex++;
return e;
}
58. @Override
public E poll() {
if (consumerIndex == producerIndex) {
return null;
}
final int offset = (int)(consumerIndex % buffer.length);
final E e = buffer[offset];
buffer[offset] = null;
consumerIndex++;
return e;
}
59. @Override
public E poll() {
if (consumerIndex == producerIndex) {
return null;
}
final int offset = (int)(consumerIndex % buffer.length);
final E e = buffer[offset];
buffer[offset] = null;
consumerIndex++;
return e;
}
60. @Override
public E poll() {
if (consumerIndex == producerIndex) {
return null;
}
final int offset = (int)(consumerIndex % buffer.length);
final E e = buffer[offset];
buffer[offset] = null;
consumerIndex++;
return e;
}
61. @Override
public E poll() {
if (consumerIndex == producerIndex) {
return null;
}
final int offset = (int)(consumerIndex % buffer.length);
final E e = buffer[offset];
buffer[offset] = null;
consumerIndex++;
return e;
}
62. @Override
public E poll() {
if (consumerIndex == producerIndex) {
return null;
}
final int offset = (int)(consumerIndex % buffer.length);
final E e = buffer[offset];
buffer[offset] = null;
consumerIndex++;
return e;
}
65. private final E[] buffer;
private volatile long producerIndex = 0;
private volatile long consumerIndex = 0;
66. @Override
public boolean offer(final E e) {
if (size() == buffer.length) {
// queue is full
return false;
}
final int offset =
(int)(producerIndex
% buffer.length);
buffer[offset] = e;
producerIndex++;
return true;
}
@Override
public E poll() {
if (consumerIndex == producerIndex) {
// queue is empty
return null;
}
final int offset = (int)(consumerIndex
% buffer.length);
final E e = buffer[offset];
buffer[offset] = null;
consumerIndex++;
return e;
}
Producer Thread Consumer Thread
77. Lequel est le plus rapide ?
public static final int SIZE = 256 * 1024;
private int[] data = new int[SIZE];
@Setup
public void init() {
Random rand = new Random();
for (int i = 0; i < SIZE; i++) {
data[i] = rand.nextInt(100) -‐ 50;
}
}
78. @Benchmark
public int mathAbs() {
int sum = 0;
for (int x : data) {
sum += Math.abs(x);
}
return sum;
}
@Benchmark
public int customAbs() {
int sum = 0;
for (int x : data) {
if (x < 0) {
sum -‐= x;
} else {
sum += x;
}
}
return sum;
}
1 2
Lequel est le plus rapide ?
104. import java.util.AbstractQueue;
public final class LamportQueue2<E> extends AbstractQueue<E> {
private final E[] buffer;
private final AtomicLong producerIndex = new AtomicLong();
private final AtomicLong consumerIndex = new AtomicLong();
public LamportQueue2(int capacity) {
buffer = (E[]) new Object[capacity];
}
@Override
public int size() {
return (int) (producerIndex.get() -‐ consumerIndex.get());
}
105. @Override
public boolean offer(final E e) {
if (size() == buffer.length) {
return false;
}
final int offset = (int)(producerIndex % buffer.length);
buffer[offset] = e;
producerIndex.lazySet(producerIndex.get() + 1);
return true;
}
106. @Override
public E poll() {
if (consumerIndex == producerIndex) {
return null;
}
final int offset = (int)(consumerIndex.get() % buffer.length);
final E e = buffer[offset];
buffer[offset] = null;
consumerIndex.lazySet(consumerIndex.get() + 1);
return e;
}
119. <configuration>
<appender name = "TXT-‐CHRONICLE"
class = "net.openhft.chronicle.logger.logback.TextIndexedChronicleAppender">
<path>perf-‐test-‐txt-‐chronicle</path>
</appender>
</configuration>
Chronicle Logback Appender
120. Texte vs Binaire – données de marché
• Code ISIN sur 12 caractères
• Un prix d’achat (bid)
• Un prix de vente (ask)
Isin=FR0000120271 Bid=46.575 Ask=46.590
121. Texte vs Binaire
Isin=FR0000120271 Bid=46.575 Ask=46.590
Texte (UTF-8)
49 73 69 6E 3D 46 52 30
30 30 30 31 32 30 32 37
31 20 42 69 64 3D 34 36
2E 35 37 35 20 41 73 6B
3D 34 36 2E 35 39 30
39 octets
Binaire
46 52 30 30 30 30 31 32
30 32 37 31 40 47 49 99
99 99 99 9A 40 47 4B 85
1E B8 51 EC
28 octets (20 avec des int)
123. Serialization vs toString()
public class Quote {
String code;
double bid;
double ask;
}
ByteBuffer buffer =
ByteBuffer.allocate(28);
buffer.put(code.getBytes());
buffer.putDouble(bid);
buffer.putDouble(ask);
buffer.flip();
out.write(buffer.array());
124. <appender name = "TXT-‐CHRONICLE"
class = "net.openhft.chronicle.logger.logback.TextIndexedChronicleAppender">
<path>perf-‐test-‐txt-‐chronicle</path>
</appender>
<appender name = "BIN-‐CHRONICLE"
class = "net.openhft.chronicle.logger.logback.BinaryIndexedChronicleAppender">
<path>perf-‐test-‐bin-‐chronicle</path>
</appender>
Logger en binaire avec Chronicle
126. Bench logger
• Log de 100 000 messages de cotation
• Mesure de la latence induite par l’appel au logger
• System Under Test
CPU : 2x (Xeon E5-2630, 2.3GHz – 2.8 GHz, 6 cores)
RAM : 64 GB DDR3 PC10600
HDD : 2x 300GB en RAID1, 10K tr/min, SAS 6 Gb/s, 1 GB cache
Network 1GbE
OS: RHEL 6.4 avec un kernel 2.6.32
129. Dernier Quiz : ce qu’il faut retenir…
C’est toujours le premier programme le plus rapide
Il faut se méfier de son intuition, en matière de perf, il faut mesurer !!!
Soyez curieux, le hardware ce n’est pas sale
130. DÉCOUVREZ TOUTES NOS OFFRES SUR
C A R E E R S . S O C I E T E G E N E R A L E . C O M
132. @Alex_Victoor @ThierryAbalea#sginsideit
Références
Talk ”Lock-free Algorithms for Ultimate Performance”, Martin Thompson
https://yow.eventer.com/yow-2012-1012/lock-free-algorithms-for-ultimate-performance-by-martin-thompson-1250
Talk “Queue evolution: from 10M to 470M ops/sec“, Nitsan Wakart
https://vimeo.com/100197431 / https://github.com/nitsanw/QueueEvolution
Talk “How NOT to Measure Latency”, Gil Tene
http://www.infoq.com/presentations/latency-pitfalls
JMH, THE Micro Benchmark Tool for Java http://openjdk.java.net/projects/code-tools/jmh/
HdrHistogram, THE Latency Measurement & Plotting Tool
https://github.com/HdrHistogram/HdrHistogram
Blog posts related to Mechanical Sympathy, Martin Thompson
http://mechanical-sympathy.blogspot.com/
Blog posts related to Java Performance, Nitsan Wakart http://psy-lob-saw.blogspot.com/
Blog posts related to Java Performance, Peter Lawrey http://vanillajava.blogspot.com/
Chronicle, OpenHFT’s Tools http://openhft.net/
The “Mechanical Sympathy” Forum https://groups.google.com/forum/#!forum/mechanical-sympathy
Code de cette présentation https://github.com/ThierryAbalea/high-performance-2015-talk
Slides de cette présentation http://www.slideshare.net/ThierryAbalea/un-monde-o-1-ms-vaut-100-m-devoxx-france-2015
slideshare