SlideShare une entreprise Scribd logo
1  sur  45
How to add an optimization for C#
to JIT compiler
@EgorBo
Engineer at Microsoft
RuyJIT
From C# to machine code
2
C# IL IR ASM
float y = x / 2;
float y = x * 0.5f;
Let’s start with a simple optimization
3
vdivss (Latency: 20, R.Throughput: 14)
vmulss (Latency: 5, R.Throughput: 0.5)
^ for my MacBook (Haswell)
X / C
4
double y = x / 2;
float y = x / 2;
double y = x / 10;
double y = x / 8;
double y = x / -0.5;
float x = 48.665f;
Console.WriteLine(x / 10f); // 4.8665
Console.WriteLine(x * 0.1f); // 4.8665004
= x * 0.5
= x * 0.5f
= x * 0.1
= x * 0.125
= x * -2
X / C – let me optimize it in Roslyn!
5
static float GetSpeed(float distance, float time)
{
return distance / time;
}
...
float speed = GetSpeed(distance, 2);
Does Roslyn see “X/C” here?
NO! It doesn’t inline methods
Where to implement my custom optimization?
6
• Roslyn
+ No time constraints
+ It’s written in C# - easy to add optimizations, easy to debug and experiment
- No cross-assembly optimizations
- No CPU-dependent optimizations (IL is cross-platform)
- Doesn’t know how the code will look like after inlining, CSE, loop optimizations, etc.
- F# doesn’t use Roslyn
• JIT
+ Inlining, CSE, Loop opts, etc phases create more opportunities for optimizations
+ Knows everything about target platform, CPU capabilities
- Written in C++, difficult to experiment
- Time constraints for optimizations (probably not that important with Tiering)
• R2R (AOT)
+ No time constraints (some optimizations are really time consuming, e.g. full escape analysis)
- No CPU-dependent optimizations
- Will be most likely re-jitted anyway?
• ILLink Custom Step
+ Cross-assembly IL optimizations
+ Written in C#
+ We can manually de-virtualize types/methods/calls (if we know what we are doing)
- Still no inlining, CSE, etc..
RuyJIT: phases
7
RuyJIT: where to morph my X/C?
8
GenTree
GenTreeStmt
GenTree
(GenTreeOp)
BasicBlock
GenTree
(GenTreeDblCon)
GenTree
(GenTreeDblCon)
GenTree
(GenTreeCall)
GenTree
(GenTreeLclVar)
static float Test(float x)
{
return Foo(x, 3.14) * 2;
}
Compiler
MUL
2.0
3.14x
Foo
Dump IR via COMPlus_JitDump
Back to X/C: Morph IR Tree
static float Calculate(float distance, float v0)
{
return distance / 2 + v0;
}
ADD
DIV
LCL_VAR
(v0)
CNS_DBL
(2.0)
LCL_VAT
(distance)
ADD
MUL
LCL_VAR
(v0)
CNS_DBL
(0.5)
LCL_VAR
(distance)
Morph
Implementing the opt in morph.cpp
DIV
op2op1
Inspired by GT_ROL
13
/// <summary>
/// Rotates the specified value left by the specified number of bits.
/// </summary>
public static uint RotateLeft(uint value, int offset)
=> (value << offset) | (value >> (32 - offset));
OR ROL
/  / 
/  / 
LSH RSZ => x y
/  / 
x AND x AND
/  / 
y 31 ADD 31
/ 
NEG 32
|
y
rol eax, cl
15
"Hello".Length -> 5
static void Append(string str)
{
if (str.Length <= 2)
QuickAppend(str);
else
SlowAppend(str);
}
...
builder.Append("/>");
builder.QuickAppend("/>");
Inline, remove if (2 <= 2)
16
"Hello".Length => 5
ARR_LENGTH
CNS_STR
(ScpHnd, SconCPX)
CNS_INT
(5)
VM
case GT_ARR_LENGTH:
{
if (op1->OperIs(GT_CNS_STR))
{
GenTreeStrCon* strCon = op1->AsStrCon();
int len = info.compCompHnd->getStringLength(
strCon->gtScpHnd, strCon->gtSconCPX);
return gtNewIconNode(len);
}
break;
}
JIT <-> VM
Interface
op1
Access VM’s data from JIT
17
bool Test1(int x) => x % 4 == 0; bool Test1(int x) => x & 3 == 0;
MOD
EQ/NE
CNS_INT
(0)
CNS_INT
(4)
anything
op1 op2
op2op1
AND
EQ/NE
CNS_INT
(0)
CNS_INT
(3)
anything
op1 op2
op2op1
Roslyn and “!=“ operator
(unexpected optimization)
18
public static bool Test1(int x) => x != 42;
public static bool Test2(int x) => x != 0;
IL_0000: ldarg.0
IL_0001: ldc.i4.42
IL_0002: ceq
IL_0004: ldc.i4.0
IL_0005: ceq
IL_0007: ret
IL_0000: ldarg.0
IL_0001: ldc.i4.0
IL_0002: cgt.un
IL_0004: ret
return (uint)x > 0
return (x == 42) == false
JIT: ok, it’s GT_NE
JIT: what?.. ok, GT_GT
19
Math.Pow(x, 2)
Math.Pow(x, 1)
Math.Pow(x, 4)
Math.Pow(x, -1)
// can be added:
Math.Pow(42, 3)
Math.Pow(1, x)
Math.Pow(2, x)
Math.Pow(x, 0)
Math.Pow(x, 0.5)
| x * x
| x
| x * x * x * x
| 1 / x
| 74088
| 1
| exp2(x)
| 1
| sqrt(x)
Range check elimination
20
Range check elimination
21
public static void Test(int[] a)
{
a[0] = 4;
a[1] = 2;
for (int i = 0; i < a.Length; i++)
{
a[i] = 0;
}
a[1] = 2;
}
Range check elimination
22
public static void Test(int[] a)
{
if (a.Length <= 0) throw new IndexOutOfRangeException();
a[0] = 4;
if (a.Length <= 1) throw new IndexOutOfRangeException();
a[1] = 2;
for (int i = 0; i < a.Length; i++)
{
if (a.Length <= i) throw new IndexOutOfRangeException();
a[i] = 0;
}
if (a.Length <= 2) throw new IndexOutOfRangeException();
a[1] = 2;
}
Range check elimination
23
public static void Test(int[] a)
{
if (a.Length <= 0) throw new IndexOutOfRangeException();
a[0] = 4;
if (a.Length <= 1) throw new IndexOutOfRangeException();
a[1] = 2;
for (int i = 0; i < a.Length; i++)
{
if (a.Length <= i) throw new IndexOutOfRangeException();
a[i] = 0;
}
if (a.Length <= 2) throw new IndexOutOfRangeException();
a[1] = 2;
}
Range check elimination
24
public static void Test(int[] a)
{
if (a.Length <= 1) throw new IndexOutOfRangeException();
a[1] = 2;
if (a.Length <= 1) throw new IndexOutOfRangeException();
a[0] = 4;
for (int i = 0; i < a.Length; i++)
{
if (a.Length <= i) throw new IndexOutOfRangeException();
a[i] = 0;
}
if (a.Length <= 2) throw new IndexOutOfRangeException();
a[1] = 2;
}
Range check elimination
25
public static void Test(int[] a)
{
if (a.Length <= 1) throw new IndexOutOfRangeException();
a[1] = 2;
a[0] = 4;
for (int i = 0; i < a.Length; i++)
{
a[i] = 0;
}
a[1] = 2;
}
rangecheck.cpp (simplified)
26
void RangeCheck::OptimizeRangeCheck(GenTreeBoundsChk* bndsChk)
{
// Get the range for this index.
Range range = GetRange(...);
// If upper or lower limit is unknown, then return.
if (range.UpperLimit().IsUnknown() || range.LowerLimit().IsUnknown())
{
return;
}
// Is the range between the lower and upper bound values.
if (BetweenBounds(range, 0, bndsChk->gtArrLen))
{
m_pCompiler->optRemoveRangeCheck(treeParent, stmt);
}
return;
}
rangecheck.cpp (simplified)
27
void RangeCheck::OptimizeRangeCheck(GenTreeBoundsChk* bndsChk)
{
// Get the range for this index.
Range range = GetRange(...);
// If upper or lower limit is unknown, then return.
if (range.UpperLimit().IsUnknown() || range.LowerLimit().IsUnknown())
{
return;
}
// Is the range between the lower and upper bound values.
if (BetweenBounds(range, 0, bndsChk->gtArrLen))
{
m_pCompiler->optRemoveRangeCheck(treeParent, stmt);
}
return;
}
rangecheck.cpp (simplified)
28
void RangeCheck::OptimizeRangeCheck(GenTreeBoundsChk* bndsChk)
{
// Get the range for this index.
Range range = GetRange(...);
// If upper or lower limit is unknown, then return.
if (range.UpperLimit().IsUnknown() || range.LowerLimit().IsUnknown())
{
return;
}
// Is the range between the lower and upper bound values.
if (BetweenBounds(range, 0, bndsChk->gtArrLen))
{
m_pCompiler->optRemoveRangeCheck(treeParent, stmt);
}
return;
}
Byte array
29
private static readonly byte[] _data =
new byte[256] { 1, 2, 3, … };
public static byte GetByte(int i)
{
return _data[i];
}
Byte array: Roslyn hack (new feature)
30
private static ReadOnlySpan<byte> _data =>
new byte[256] { 1, 2, 3, … };
public static byte GetByte(int i)
{
return _data[i];
}
256
Byte array: byte index (my PR)
31
private static ReadOnlySpan<byte> _data =>
new byte[256] { 1, 2, 3, … };
public static byte GetByte(int i)
{
return _data[(byte)i];
}
Byte indexer will never go out of bounds!
rangecheck.cpp (simplified)
32
void RangeCheck::OptimizeRangeCheck(GenTreeBoundsChk* bndsChk)
{
// Get the range for this index.
Range range = GetRange(...);
// If upper or lower limit is unknown, then return.
if (range.UpperLimit().IsUnknown() || range.LowerLimit().IsUnknown())
{
return;
}
// Is the range between the lower and upper bound values.
if (BetweenBounds(range, 0, bndsChk->gtArrLen))
{
m_pCompiler->optRemoveRangeCheck(treeParent, stmt);
}
return;
}
[Byte.MinValue ... Byte.MaxValue]
ArrLen = 256
Loop optimizations
33
Loop Invariant Code Hoisting
34
public static bool Test(int[] a, int c)
{
for (int i = 0; i < a.Length; i++)
{
if (a[i] == c + 44)
return false;
}
return true;
}
Loop Invariant Code Hoisting
35
public static bool Test(int[] a, int c)
{
int tmp = c + 44;
for (int i = 0; i < a.Length; i++)
{
if (a[i] == tmp)
return false;
}
return true;
}
NYI: Loop-unrolling
36
public static int Test(int[] a)
{
int sum = 0;
for (int i = 0; i < a.Length; i++)
{
sum += a[i];
}
return sum;
}
NYI: Loop-unrolling
37
public static int Test(int[] a)
{
int sum = 0;
for (int i = 0; i < a.Length - 3; i += 4)
{
sum += a[i];
sum += a[i+1];
sum += a[i+2];
sum += a[i+3];
}
return sum;
}
NYI: Loop-unswitch
38
public static int Test(int[] a, bool condition)
{
int agr = 0;
for (int i = 0; i < a.Length; i++)
{
if (condition)
agr += a[i];
else
agr *= a[i];
}
return agr;
}
NYI: Loop-unswitch
39
public static int Test (int[] a, bool condition)
{
int agr = 0;
if (condition)
for (int i = 0; i < a.Length; i++)
agr += a[i];
else
for (int i = 0; i < a.Length; i++)
agr *= a[i];
return agr;
}
NYI: Loop-deletion
40
public static void Test()
{
for (int i = 0; i < 10; i++) { }
}
; Method Program:Test()
G_M44187_IG01:
G_M44187_IG02:
xor eax, eax
G_M44187_IG03:
inc eax
cmp eax, 10
jl SHORT G_M44187_IG03
G_M44187_IG04:
ret
; Total bytes of code: 10
NYI: Loop-deletion
41
public static void Test()
{
}
; Method Program:DeadLoop()
ret
; Total bytes of code: 1
NYI: InductiveRangeCheckElimination
42
public static void Zero1000Elements(int[] array)
{
for (int i = 0; i < 1000; i++)
array[i] = 0; // bound checks will be inserted here
}
NYI: InductiveRangeCheckElimination
43
public static void Zero1000Elements(int[] array)
{
int limit = Math.Min(array.Length, 1000);
for (int i = 0; i < limit; i++)
array[i] = 0; // bound checks are not needed here!
for (int i = limit; i < 1000; i++)
array[i] = 0; // bound checks are needed here
// so at least we could "zero" first `limit` elements without bound checks
}
NOTE: this LLVM optimization pass is not enabled by default in `opt –O2`.
Contributed by Azul developers (LLVM for JVM)
NYI: InductiveRangeCheckElimination
44
public static void Zero1000Elements(int[] array)
{
int limit = Math.Min(array.Length, 1000);
for (int i = 0; i < limit - 3; i += 4)
{
array[i] = 0;
array[i+1] = 0;
array[i+2] = 0;
array[i+3] = 0;
}
for (int i = limit; i < 1000; i++)
array[i] = 0; // bound checks are needed here
// so at least we could "zero" first `limit` elements without bound checks
}
Now we can even unroll the first loop!
NYI: InductiveRangeCheckElimination
45
public static void Zero1000Elements(int[] array)
{
int limit = Math.Min(array.Length, 1000);
memset(array, 0, limit);
for (int i = limit; i < 1000; i++)
array[i] = 0; // bound checks are needed here
// so at least we could "zero" first `limit` elements without bound checks
}
Or just replace with memset call
Q&A
Twitter: EgorBo

Contenu connexe

Tendances

Implementing jsp tag extensions
Implementing jsp tag extensionsImplementing jsp tag extensions
Implementing jsp tag extensionsSoujanya V
 
java memory management & gc
java memory management & gcjava memory management & gc
java memory management & gcexsuns
 
How to -- Goolge colab
How to -- Goolge colabHow to -- Goolge colab
How to -- Goolge colab艾鍗科技
 
ZIO Schedule: Conquering Flakiness & Recurrence with Pure Functional Programming
ZIO Schedule: Conquering Flakiness & Recurrence with Pure Functional ProgrammingZIO Schedule: Conquering Flakiness & Recurrence with Pure Functional Programming
ZIO Schedule: Conquering Flakiness & Recurrence with Pure Functional ProgrammingJohn De Goes
 
Angular 4 Data Binding | Two Way Data Binding in Angular 4 | Angular 4 Tutori...
Angular 4 Data Binding | Two Way Data Binding in Angular 4 | Angular 4 Tutori...Angular 4 Data Binding | Two Way Data Binding in Angular 4 | Angular 4 Tutori...
Angular 4 Data Binding | Two Way Data Binding in Angular 4 | Angular 4 Tutori...Edureka!
 
Introduction for Master Class "Amazing Reactive Forms"
Introduction for Master Class "Amazing Reactive Forms"Introduction for Master Class "Amazing Reactive Forms"
Introduction for Master Class "Amazing Reactive Forms"Fabio Biondi
 
Curso de Java: Introdução a lambda e Streams
Curso de Java: Introdução a lambda e StreamsCurso de Java: Introdução a lambda e Streams
Curso de Java: Introdução a lambda e StreamsHelder da Rocha
 
Basics of Model/View Qt programming
Basics of Model/View Qt programmingBasics of Model/View Qt programming
Basics of Model/View Qt programmingICS
 
Advanced java programming-contents
Advanced java programming-contentsAdvanced java programming-contents
Advanced java programming-contentsSelf-Employed
 
Webinar: Introducción a Angular
Webinar: Introducción a AngularWebinar: Introducción a Angular
Webinar: Introducción a AngularArsys
 
Abstract class and interface
Abstract class and interfaceAbstract class and interface
Abstract class and interfaceMazharul Sabbir
 

Tendances (20)

Java 8 Streams
Java 8 StreamsJava 8 Streams
Java 8 Streams
 
Implementing jsp tag extensions
Implementing jsp tag extensionsImplementing jsp tag extensions
Implementing jsp tag extensions
 
java memory management & gc
java memory management & gcjava memory management & gc
java memory management & gc
 
How to -- Goolge colab
How to -- Goolge colabHow to -- Goolge colab
How to -- Goolge colab
 
ZIO Schedule: Conquering Flakiness & Recurrence with Pure Functional Programming
ZIO Schedule: Conquering Flakiness & Recurrence with Pure Functional ProgrammingZIO Schedule: Conquering Flakiness & Recurrence with Pure Functional Programming
ZIO Schedule: Conquering Flakiness & Recurrence with Pure Functional Programming
 
Angular 4 Data Binding | Two Way Data Binding in Angular 4 | Angular 4 Tutori...
Angular 4 Data Binding | Two Way Data Binding in Angular 4 | Angular 4 Tutori...Angular 4 Data Binding | Two Way Data Binding in Angular 4 | Angular 4 Tutori...
Angular 4 Data Binding | Two Way Data Binding in Angular 4 | Angular 4 Tutori...
 
Threading in C#
Threading in C#Threading in C#
Threading in C#
 
Sequential Pattern Mining and GSP
Sequential Pattern Mining and GSPSequential Pattern Mining and GSP
Sequential Pattern Mining and GSP
 
Introduction for Master Class "Amazing Reactive Forms"
Introduction for Master Class "Amazing Reactive Forms"Introduction for Master Class "Amazing Reactive Forms"
Introduction for Master Class "Amazing Reactive Forms"
 
Monadic Java
Monadic JavaMonadic Java
Monadic Java
 
Curso de Java: Introdução a lambda e Streams
Curso de Java: Introdução a lambda e StreamsCurso de Java: Introdução a lambda e Streams
Curso de Java: Introdução a lambda e Streams
 
Basics of Model/View Qt programming
Basics of Model/View Qt programmingBasics of Model/View Qt programming
Basics of Model/View Qt programming
 
UNIT-2-PPTS-DAA.ppt
UNIT-2-PPTS-DAA.pptUNIT-2-PPTS-DAA.ppt
UNIT-2-PPTS-DAA.ppt
 
Advanced java programming-contents
Advanced java programming-contentsAdvanced java programming-contents
Advanced java programming-contents
 
Webinar: Introducción a Angular
Webinar: Introducción a AngularWebinar: Introducción a Angular
Webinar: Introducción a Angular
 
JDBC – Java Database Connectivity
JDBC – Java Database ConnectivityJDBC – Java Database Connectivity
JDBC – Java Database Connectivity
 
Abstract class and interface
Abstract class and interfaceAbstract class and interface
Abstract class and interface
 
Java 8 Lambda and Streams
Java 8 Lambda and StreamsJava 8 Lambda and Streams
Java 8 Lambda and Streams
 
Android Fragment
Android FragmentAndroid Fragment
Android Fragment
 
Java GC
Java GCJava GC
Java GC
 

Similaire à How to add an optimization for C# to RyuJIT

Egor Bogatov - .NET Core intrinsics and other micro-optimizations
Egor Bogatov - .NET Core intrinsics and other micro-optimizationsEgor Bogatov - .NET Core intrinsics and other micro-optimizations
Egor Bogatov - .NET Core intrinsics and other micro-optimizationsEgor Bogatov
 
PVS-Studio team experience: checking various open source projects, or mistake...
PVS-Studio team experience: checking various open source projects, or mistake...PVS-Studio team experience: checking various open source projects, or mistake...
PVS-Studio team experience: checking various open source projects, or mistake...Andrey Karpov
 
Halide tutorial 2019
Halide tutorial 2019Halide tutorial 2019
Halide tutorial 2019Champ Yen
 
Java Performance Puzzlers
Java Performance PuzzlersJava Performance Puzzlers
Java Performance PuzzlersDoug Hawkins
 
JVM Mechanics: When Does the JVM JIT & Deoptimize?
JVM Mechanics: When Does the JVM JIT & Deoptimize?JVM Mechanics: When Does the JVM JIT & Deoptimize?
JVM Mechanics: When Does the JVM JIT & Deoptimize?Doug Hawkins
 
C++ lectures all chapters in one slide.pptx
C++ lectures all chapters in one slide.pptxC++ lectures all chapters in one slide.pptx
C++ lectures all chapters in one slide.pptxssuser3cbb4c
 
ch04-conditional-execution.ppt
ch04-conditional-execution.pptch04-conditional-execution.ppt
ch04-conditional-execution.pptMahyuddin8
 
Pythran: Static compiler for high performance by Mehdi Amini PyData SV 2014
Pythran: Static compiler for high performance by Mehdi Amini PyData SV 2014Pythran: Static compiler for high performance by Mehdi Amini PyData SV 2014
Pythran: Static compiler for high performance by Mehdi Amini PyData SV 2014PyData
 
Whats new in_csharp4
Whats new in_csharp4Whats new in_csharp4
Whats new in_csharp4Abed Bukhari
 
Hypercritical C++ Code Review
Hypercritical C++ Code ReviewHypercritical C++ Code Review
Hypercritical C++ Code ReviewAndrey Karpov
 
Chainer-Compiler 動かしてみた
Chainer-Compiler 動かしてみたChainer-Compiler 動かしてみた
Chainer-Compiler 動かしてみたAkira Maruoka
 
شرح مقرر البرمجة 2 لغة جافا - الوحدة الثالثة
شرح مقرر البرمجة 2   لغة جافا - الوحدة الثالثةشرح مقرر البرمجة 2   لغة جافا - الوحدة الثالثة
شرح مقرر البرمجة 2 لغة جافا - الوحدة الثالثةجامعة القدس المفتوحة
 
Let’s talk about microbenchmarking
Let’s talk about microbenchmarkingLet’s talk about microbenchmarking
Let’s talk about microbenchmarkingAndrey Akinshin
 
"Quantum" performance effects
"Quantum" performance effects"Quantum" performance effects
"Quantum" performance effectsSergey Kuksenko
 
How Data Flow analysis works in a static code analyzer
How Data Flow analysis works in a static code analyzerHow Data Flow analysis works in a static code analyzer
How Data Flow analysis works in a static code analyzerAndrey Karpov
 
Introduction to NumPy for Machine Learning Programmers
Introduction to NumPy for Machine Learning ProgrammersIntroduction to NumPy for Machine Learning Programmers
Introduction to NumPy for Machine Learning ProgrammersKimikazu Kato
 
Operator overloading2
Operator overloading2Operator overloading2
Operator overloading2zindadili
 

Similaire à How to add an optimization for C# to RyuJIT (20)

Egor Bogatov - .NET Core intrinsics and other micro-optimizations
Egor Bogatov - .NET Core intrinsics and other micro-optimizationsEgor Bogatov - .NET Core intrinsics and other micro-optimizations
Egor Bogatov - .NET Core intrinsics and other micro-optimizations
 
PVS-Studio team experience: checking various open source projects, or mistake...
PVS-Studio team experience: checking various open source projects, or mistake...PVS-Studio team experience: checking various open source projects, or mistake...
PVS-Studio team experience: checking various open source projects, or mistake...
 
Halide tutorial 2019
Halide tutorial 2019Halide tutorial 2019
Halide tutorial 2019
 
Java Performance Puzzlers
Java Performance PuzzlersJava Performance Puzzlers
Java Performance Puzzlers
 
JVM Mechanics: When Does the JVM JIT & Deoptimize?
JVM Mechanics: When Does the JVM JIT & Deoptimize?JVM Mechanics: When Does the JVM JIT & Deoptimize?
JVM Mechanics: When Does the JVM JIT & Deoptimize?
 
C++ lectures all chapters in one slide.pptx
C++ lectures all chapters in one slide.pptxC++ lectures all chapters in one slide.pptx
C++ lectures all chapters in one slide.pptx
 
White box-sol
White box-solWhite box-sol
White box-sol
 
ch04-conditional-execution.ppt
ch04-conditional-execution.pptch04-conditional-execution.ppt
ch04-conditional-execution.ppt
 
Pythran: Static compiler for high performance by Mehdi Amini PyData SV 2014
Pythran: Static compiler for high performance by Mehdi Amini PyData SV 2014Pythran: Static compiler for high performance by Mehdi Amini PyData SV 2014
Pythran: Static compiler for high performance by Mehdi Amini PyData SV 2014
 
Whats new in_csharp4
Whats new in_csharp4Whats new in_csharp4
Whats new in_csharp4
 
Exploiting vectorization with ISPC
Exploiting vectorization with ISPCExploiting vectorization with ISPC
Exploiting vectorization with ISPC
 
Hypercritical C++ Code Review
Hypercritical C++ Code ReviewHypercritical C++ Code Review
Hypercritical C++ Code Review
 
Chainer-Compiler 動かしてみた
Chainer-Compiler 動かしてみたChainer-Compiler 動かしてみた
Chainer-Compiler 動かしてみた
 
شرح مقرر البرمجة 2 لغة جافا - الوحدة الثالثة
شرح مقرر البرمجة 2   لغة جافا - الوحدة الثالثةشرح مقرر البرمجة 2   لغة جافا - الوحدة الثالثة
شرح مقرر البرمجة 2 لغة جافا - الوحدة الثالثة
 
Let’s talk about microbenchmarking
Let’s talk about microbenchmarkingLet’s talk about microbenchmarking
Let’s talk about microbenchmarking
 
"Quantum" performance effects
"Quantum" performance effects"Quantum" performance effects
"Quantum" performance effects
 
How Data Flow analysis works in a static code analyzer
How Data Flow analysis works in a static code analyzerHow Data Flow analysis works in a static code analyzer
How Data Flow analysis works in a static code analyzer
 
Introduction to NumPy for Machine Learning Programmers
Introduction to NumPy for Machine Learning ProgrammersIntroduction to NumPy for Machine Learning Programmers
Introduction to NumPy for Machine Learning Programmers
 
Operator overloading2
Operator overloading2Operator overloading2
Operator overloading2
 
10. Recursion
10. Recursion10. Recursion
10. Recursion
 

Dernier

%+27788225528 love spells in Boston Psychic Readings, Attraction spells,Bring...
%+27788225528 love spells in Boston Psychic Readings, Attraction spells,Bring...%+27788225528 love spells in Boston Psychic Readings, Attraction spells,Bring...
%+27788225528 love spells in Boston Psychic Readings, Attraction spells,Bring...masabamasaba
 
%in Rustenburg+277-882-255-28 abortion pills for sale in Rustenburg
%in Rustenburg+277-882-255-28 abortion pills for sale in Rustenburg%in Rustenburg+277-882-255-28 abortion pills for sale in Rustenburg
%in Rustenburg+277-882-255-28 abortion pills for sale in Rustenburgmasabamasaba
 
%in kaalfontein+277-882-255-28 abortion pills for sale in kaalfontein
%in kaalfontein+277-882-255-28 abortion pills for sale in kaalfontein%in kaalfontein+277-882-255-28 abortion pills for sale in kaalfontein
%in kaalfontein+277-882-255-28 abortion pills for sale in kaalfonteinmasabamasaba
 
WSO2CON 2024 - Does Open Source Still Matter?
WSO2CON 2024 - Does Open Source Still Matter?WSO2CON 2024 - Does Open Source Still Matter?
WSO2CON 2024 - Does Open Source Still Matter?WSO2
 
%in kempton park+277-882-255-28 abortion pills for sale in kempton park
%in kempton park+277-882-255-28 abortion pills for sale in kempton park %in kempton park+277-882-255-28 abortion pills for sale in kempton park
%in kempton park+277-882-255-28 abortion pills for sale in kempton park masabamasaba
 
VTU technical seminar 8Th Sem on Scikit-learn
VTU technical seminar 8Th Sem on Scikit-learnVTU technical seminar 8Th Sem on Scikit-learn
VTU technical seminar 8Th Sem on Scikit-learnAmarnathKambale
 
WSO2CON 2024 - Cloud Native Middleware: Domain-Driven Design, Cell-Based Arch...
WSO2CON 2024 - Cloud Native Middleware: Domain-Driven Design, Cell-Based Arch...WSO2CON 2024 - Cloud Native Middleware: Domain-Driven Design, Cell-Based Arch...
WSO2CON 2024 - Cloud Native Middleware: Domain-Driven Design, Cell-Based Arch...WSO2
 
%in tembisa+277-882-255-28 abortion pills for sale in tembisa
%in tembisa+277-882-255-28 abortion pills for sale in tembisa%in tembisa+277-882-255-28 abortion pills for sale in tembisa
%in tembisa+277-882-255-28 abortion pills for sale in tembisamasabamasaba
 
WSO2CON 2024 - WSO2's Digital Transformation Journey with Choreo: A Platforml...
WSO2CON 2024 - WSO2's Digital Transformation Journey with Choreo: A Platforml...WSO2CON 2024 - WSO2's Digital Transformation Journey with Choreo: A Platforml...
WSO2CON 2024 - WSO2's Digital Transformation Journey with Choreo: A Platforml...WSO2
 
%+27788225528 love spells in Knoxville Psychic Readings, Attraction spells,Br...
%+27788225528 love spells in Knoxville Psychic Readings, Attraction spells,Br...%+27788225528 love spells in Knoxville Psychic Readings, Attraction spells,Br...
%+27788225528 love spells in Knoxville Psychic Readings, Attraction spells,Br...masabamasaba
 
%+27788225528 love spells in Toronto Psychic Readings, Attraction spells,Brin...
%+27788225528 love spells in Toronto Psychic Readings, Attraction spells,Brin...%+27788225528 love spells in Toronto Psychic Readings, Attraction spells,Brin...
%+27788225528 love spells in Toronto Psychic Readings, Attraction spells,Brin...masabamasaba
 
%in Hazyview+277-882-255-28 abortion pills for sale in Hazyview
%in Hazyview+277-882-255-28 abortion pills for sale in Hazyview%in Hazyview+277-882-255-28 abortion pills for sale in Hazyview
%in Hazyview+277-882-255-28 abortion pills for sale in Hazyviewmasabamasaba
 
8257 interfacing 2 in microprocessor for btech students
8257 interfacing 2 in microprocessor for btech students8257 interfacing 2 in microprocessor for btech students
8257 interfacing 2 in microprocessor for btech studentsHimanshiGarg82
 
The title is not connected to what is inside
The title is not connected to what is insideThe title is not connected to what is inside
The title is not connected to what is insideshinachiaurasa2
 
W01_panagenda_Navigating-the-Future-with-The-Hitchhikers-Guide-to-Notes-and-D...
W01_panagenda_Navigating-the-Future-with-The-Hitchhikers-Guide-to-Notes-and-D...W01_panagenda_Navigating-the-Future-with-The-Hitchhikers-Guide-to-Notes-and-D...
W01_panagenda_Navigating-the-Future-with-The-Hitchhikers-Guide-to-Notes-and-D...panagenda
 
%+27788225528 love spells in Colorado Springs Psychic Readings, Attraction sp...
%+27788225528 love spells in Colorado Springs Psychic Readings, Attraction sp...%+27788225528 love spells in Colorado Springs Psychic Readings, Attraction sp...
%+27788225528 love spells in Colorado Springs Psychic Readings, Attraction sp...masabamasaba
 
Devoxx UK 2024 - Going serverless with Quarkus, GraalVM native images and AWS...
Devoxx UK 2024 - Going serverless with Quarkus, GraalVM native images and AWS...Devoxx UK 2024 - Going serverless with Quarkus, GraalVM native images and AWS...
Devoxx UK 2024 - Going serverless with Quarkus, GraalVM native images and AWS...Bert Jan Schrijver
 
What Goes Wrong with Language Definitions and How to Improve the Situation
What Goes Wrong with Language Definitions and How to Improve the SituationWhat Goes Wrong with Language Definitions and How to Improve the Situation
What Goes Wrong with Language Definitions and How to Improve the SituationJuha-Pekka Tolvanen
 
%in Stilfontein+277-882-255-28 abortion pills for sale in Stilfontein
%in Stilfontein+277-882-255-28 abortion pills for sale in Stilfontein%in Stilfontein+277-882-255-28 abortion pills for sale in Stilfontein
%in Stilfontein+277-882-255-28 abortion pills for sale in Stilfonteinmasabamasaba
 
WSO2Con2024 - From Code To Cloud: Fast Track Your Cloud Native Journey with C...
WSO2Con2024 - From Code To Cloud: Fast Track Your Cloud Native Journey with C...WSO2Con2024 - From Code To Cloud: Fast Track Your Cloud Native Journey with C...
WSO2Con2024 - From Code To Cloud: Fast Track Your Cloud Native Journey with C...WSO2
 

Dernier (20)

%+27788225528 love spells in Boston Psychic Readings, Attraction spells,Bring...
%+27788225528 love spells in Boston Psychic Readings, Attraction spells,Bring...%+27788225528 love spells in Boston Psychic Readings, Attraction spells,Bring...
%+27788225528 love spells in Boston Psychic Readings, Attraction spells,Bring...
 
%in Rustenburg+277-882-255-28 abortion pills for sale in Rustenburg
%in Rustenburg+277-882-255-28 abortion pills for sale in Rustenburg%in Rustenburg+277-882-255-28 abortion pills for sale in Rustenburg
%in Rustenburg+277-882-255-28 abortion pills for sale in Rustenburg
 
%in kaalfontein+277-882-255-28 abortion pills for sale in kaalfontein
%in kaalfontein+277-882-255-28 abortion pills for sale in kaalfontein%in kaalfontein+277-882-255-28 abortion pills for sale in kaalfontein
%in kaalfontein+277-882-255-28 abortion pills for sale in kaalfontein
 
WSO2CON 2024 - Does Open Source Still Matter?
WSO2CON 2024 - Does Open Source Still Matter?WSO2CON 2024 - Does Open Source Still Matter?
WSO2CON 2024 - Does Open Source Still Matter?
 
%in kempton park+277-882-255-28 abortion pills for sale in kempton park
%in kempton park+277-882-255-28 abortion pills for sale in kempton park %in kempton park+277-882-255-28 abortion pills for sale in kempton park
%in kempton park+277-882-255-28 abortion pills for sale in kempton park
 
VTU technical seminar 8Th Sem on Scikit-learn
VTU technical seminar 8Th Sem on Scikit-learnVTU technical seminar 8Th Sem on Scikit-learn
VTU technical seminar 8Th Sem on Scikit-learn
 
WSO2CON 2024 - Cloud Native Middleware: Domain-Driven Design, Cell-Based Arch...
WSO2CON 2024 - Cloud Native Middleware: Domain-Driven Design, Cell-Based Arch...WSO2CON 2024 - Cloud Native Middleware: Domain-Driven Design, Cell-Based Arch...
WSO2CON 2024 - Cloud Native Middleware: Domain-Driven Design, Cell-Based Arch...
 
%in tembisa+277-882-255-28 abortion pills for sale in tembisa
%in tembisa+277-882-255-28 abortion pills for sale in tembisa%in tembisa+277-882-255-28 abortion pills for sale in tembisa
%in tembisa+277-882-255-28 abortion pills for sale in tembisa
 
WSO2CON 2024 - WSO2's Digital Transformation Journey with Choreo: A Platforml...
WSO2CON 2024 - WSO2's Digital Transformation Journey with Choreo: A Platforml...WSO2CON 2024 - WSO2's Digital Transformation Journey with Choreo: A Platforml...
WSO2CON 2024 - WSO2's Digital Transformation Journey with Choreo: A Platforml...
 
%+27788225528 love spells in Knoxville Psychic Readings, Attraction spells,Br...
%+27788225528 love spells in Knoxville Psychic Readings, Attraction spells,Br...%+27788225528 love spells in Knoxville Psychic Readings, Attraction spells,Br...
%+27788225528 love spells in Knoxville Psychic Readings, Attraction spells,Br...
 
%+27788225528 love spells in Toronto Psychic Readings, Attraction spells,Brin...
%+27788225528 love spells in Toronto Psychic Readings, Attraction spells,Brin...%+27788225528 love spells in Toronto Psychic Readings, Attraction spells,Brin...
%+27788225528 love spells in Toronto Psychic Readings, Attraction spells,Brin...
 
%in Hazyview+277-882-255-28 abortion pills for sale in Hazyview
%in Hazyview+277-882-255-28 abortion pills for sale in Hazyview%in Hazyview+277-882-255-28 abortion pills for sale in Hazyview
%in Hazyview+277-882-255-28 abortion pills for sale in Hazyview
 
8257 interfacing 2 in microprocessor for btech students
8257 interfacing 2 in microprocessor for btech students8257 interfacing 2 in microprocessor for btech students
8257 interfacing 2 in microprocessor for btech students
 
The title is not connected to what is inside
The title is not connected to what is insideThe title is not connected to what is inside
The title is not connected to what is inside
 
W01_panagenda_Navigating-the-Future-with-The-Hitchhikers-Guide-to-Notes-and-D...
W01_panagenda_Navigating-the-Future-with-The-Hitchhikers-Guide-to-Notes-and-D...W01_panagenda_Navigating-the-Future-with-The-Hitchhikers-Guide-to-Notes-and-D...
W01_panagenda_Navigating-the-Future-with-The-Hitchhikers-Guide-to-Notes-and-D...
 
%+27788225528 love spells in Colorado Springs Psychic Readings, Attraction sp...
%+27788225528 love spells in Colorado Springs Psychic Readings, Attraction sp...%+27788225528 love spells in Colorado Springs Psychic Readings, Attraction sp...
%+27788225528 love spells in Colorado Springs Psychic Readings, Attraction sp...
 
Devoxx UK 2024 - Going serverless with Quarkus, GraalVM native images and AWS...
Devoxx UK 2024 - Going serverless with Quarkus, GraalVM native images and AWS...Devoxx UK 2024 - Going serverless with Quarkus, GraalVM native images and AWS...
Devoxx UK 2024 - Going serverless with Quarkus, GraalVM native images and AWS...
 
What Goes Wrong with Language Definitions and How to Improve the Situation
What Goes Wrong with Language Definitions and How to Improve the SituationWhat Goes Wrong with Language Definitions and How to Improve the Situation
What Goes Wrong with Language Definitions and How to Improve the Situation
 
%in Stilfontein+277-882-255-28 abortion pills for sale in Stilfontein
%in Stilfontein+277-882-255-28 abortion pills for sale in Stilfontein%in Stilfontein+277-882-255-28 abortion pills for sale in Stilfontein
%in Stilfontein+277-882-255-28 abortion pills for sale in Stilfontein
 
WSO2Con2024 - From Code To Cloud: Fast Track Your Cloud Native Journey with C...
WSO2Con2024 - From Code To Cloud: Fast Track Your Cloud Native Journey with C...WSO2Con2024 - From Code To Cloud: Fast Track Your Cloud Native Journey with C...
WSO2Con2024 - From Code To Cloud: Fast Track Your Cloud Native Journey with C...
 

How to add an optimization for C# to RyuJIT

  • 1. How to add an optimization for C# to JIT compiler @EgorBo Engineer at Microsoft
  • 2. RuyJIT From C# to machine code 2 C# IL IR ASM
  • 3. float y = x / 2; float y = x * 0.5f; Let’s start with a simple optimization 3 vdivss (Latency: 20, R.Throughput: 14) vmulss (Latency: 5, R.Throughput: 0.5) ^ for my MacBook (Haswell)
  • 4. X / C 4 double y = x / 2; float y = x / 2; double y = x / 10; double y = x / 8; double y = x / -0.5; float x = 48.665f; Console.WriteLine(x / 10f); // 4.8665 Console.WriteLine(x * 0.1f); // 4.8665004 = x * 0.5 = x * 0.5f = x * 0.1 = x * 0.125 = x * -2
  • 5. X / C – let me optimize it in Roslyn! 5 static float GetSpeed(float distance, float time) { return distance / time; } ... float speed = GetSpeed(distance, 2); Does Roslyn see “X/C” here? NO! It doesn’t inline methods
  • 6. Where to implement my custom optimization? 6 • Roslyn + No time constraints + It’s written in C# - easy to add optimizations, easy to debug and experiment - No cross-assembly optimizations - No CPU-dependent optimizations (IL is cross-platform) - Doesn’t know how the code will look like after inlining, CSE, loop optimizations, etc. - F# doesn’t use Roslyn • JIT + Inlining, CSE, Loop opts, etc phases create more opportunities for optimizations + Knows everything about target platform, CPU capabilities - Written in C++, difficult to experiment - Time constraints for optimizations (probably not that important with Tiering) • R2R (AOT) + No time constraints (some optimizations are really time consuming, e.g. full escape analysis) - No CPU-dependent optimizations - Will be most likely re-jitted anyway? • ILLink Custom Step + Cross-assembly IL optimizations + Written in C# + We can manually de-virtualize types/methods/calls (if we know what we are doing) - Still no inlining, CSE, etc..
  • 8. RuyJIT: where to morph my X/C? 8
  • 10. Dump IR via COMPlus_JitDump
  • 11. Back to X/C: Morph IR Tree static float Calculate(float distance, float v0) { return distance / 2 + v0; } ADD DIV LCL_VAR (v0) CNS_DBL (2.0) LCL_VAT (distance) ADD MUL LCL_VAR (v0) CNS_DBL (0.5) LCL_VAR (distance) Morph
  • 12. Implementing the opt in morph.cpp DIV op2op1
  • 13. Inspired by GT_ROL 13 /// <summary> /// Rotates the specified value left by the specified number of bits. /// </summary> public static uint RotateLeft(uint value, int offset) => (value << offset) | (value >> (32 - offset)); OR ROL / / / / LSH RSZ => x y / / x AND x AND / / y 31 ADD 31 / NEG 32 | y rol eax, cl
  • 14. 15 "Hello".Length -> 5 static void Append(string str) { if (str.Length <= 2) QuickAppend(str); else SlowAppend(str); } ... builder.Append("/>"); builder.QuickAppend("/>"); Inline, remove if (2 <= 2)
  • 15. 16 "Hello".Length => 5 ARR_LENGTH CNS_STR (ScpHnd, SconCPX) CNS_INT (5) VM case GT_ARR_LENGTH: { if (op1->OperIs(GT_CNS_STR)) { GenTreeStrCon* strCon = op1->AsStrCon(); int len = info.compCompHnd->getStringLength( strCon->gtScpHnd, strCon->gtSconCPX); return gtNewIconNode(len); } break; } JIT <-> VM Interface op1 Access VM’s data from JIT
  • 16. 17 bool Test1(int x) => x % 4 == 0; bool Test1(int x) => x & 3 == 0; MOD EQ/NE CNS_INT (0) CNS_INT (4) anything op1 op2 op2op1 AND EQ/NE CNS_INT (0) CNS_INT (3) anything op1 op2 op2op1
  • 17. Roslyn and “!=“ operator (unexpected optimization) 18 public static bool Test1(int x) => x != 42; public static bool Test2(int x) => x != 0; IL_0000: ldarg.0 IL_0001: ldc.i4.42 IL_0002: ceq IL_0004: ldc.i4.0 IL_0005: ceq IL_0007: ret IL_0000: ldarg.0 IL_0001: ldc.i4.0 IL_0002: cgt.un IL_0004: ret return (uint)x > 0 return (x == 42) == false JIT: ok, it’s GT_NE JIT: what?.. ok, GT_GT
  • 18. 19 Math.Pow(x, 2) Math.Pow(x, 1) Math.Pow(x, 4) Math.Pow(x, -1) // can be added: Math.Pow(42, 3) Math.Pow(1, x) Math.Pow(2, x) Math.Pow(x, 0) Math.Pow(x, 0.5) | x * x | x | x * x * x * x | 1 / x | 74088 | 1 | exp2(x) | 1 | sqrt(x)
  • 20. Range check elimination 21 public static void Test(int[] a) { a[0] = 4; a[1] = 2; for (int i = 0; i < a.Length; i++) { a[i] = 0; } a[1] = 2; }
  • 21. Range check elimination 22 public static void Test(int[] a) { if (a.Length <= 0) throw new IndexOutOfRangeException(); a[0] = 4; if (a.Length <= 1) throw new IndexOutOfRangeException(); a[1] = 2; for (int i = 0; i < a.Length; i++) { if (a.Length <= i) throw new IndexOutOfRangeException(); a[i] = 0; } if (a.Length <= 2) throw new IndexOutOfRangeException(); a[1] = 2; }
  • 22. Range check elimination 23 public static void Test(int[] a) { if (a.Length <= 0) throw new IndexOutOfRangeException(); a[0] = 4; if (a.Length <= 1) throw new IndexOutOfRangeException(); a[1] = 2; for (int i = 0; i < a.Length; i++) { if (a.Length <= i) throw new IndexOutOfRangeException(); a[i] = 0; } if (a.Length <= 2) throw new IndexOutOfRangeException(); a[1] = 2; }
  • 23. Range check elimination 24 public static void Test(int[] a) { if (a.Length <= 1) throw new IndexOutOfRangeException(); a[1] = 2; if (a.Length <= 1) throw new IndexOutOfRangeException(); a[0] = 4; for (int i = 0; i < a.Length; i++) { if (a.Length <= i) throw new IndexOutOfRangeException(); a[i] = 0; } if (a.Length <= 2) throw new IndexOutOfRangeException(); a[1] = 2; }
  • 24. Range check elimination 25 public static void Test(int[] a) { if (a.Length <= 1) throw new IndexOutOfRangeException(); a[1] = 2; a[0] = 4; for (int i = 0; i < a.Length; i++) { a[i] = 0; } a[1] = 2; }
  • 25. rangecheck.cpp (simplified) 26 void RangeCheck::OptimizeRangeCheck(GenTreeBoundsChk* bndsChk) { // Get the range for this index. Range range = GetRange(...); // If upper or lower limit is unknown, then return. if (range.UpperLimit().IsUnknown() || range.LowerLimit().IsUnknown()) { return; } // Is the range between the lower and upper bound values. if (BetweenBounds(range, 0, bndsChk->gtArrLen)) { m_pCompiler->optRemoveRangeCheck(treeParent, stmt); } return; }
  • 26. rangecheck.cpp (simplified) 27 void RangeCheck::OptimizeRangeCheck(GenTreeBoundsChk* bndsChk) { // Get the range for this index. Range range = GetRange(...); // If upper or lower limit is unknown, then return. if (range.UpperLimit().IsUnknown() || range.LowerLimit().IsUnknown()) { return; } // Is the range between the lower and upper bound values. if (BetweenBounds(range, 0, bndsChk->gtArrLen)) { m_pCompiler->optRemoveRangeCheck(treeParent, stmt); } return; }
  • 27. rangecheck.cpp (simplified) 28 void RangeCheck::OptimizeRangeCheck(GenTreeBoundsChk* bndsChk) { // Get the range for this index. Range range = GetRange(...); // If upper or lower limit is unknown, then return. if (range.UpperLimit().IsUnknown() || range.LowerLimit().IsUnknown()) { return; } // Is the range between the lower and upper bound values. if (BetweenBounds(range, 0, bndsChk->gtArrLen)) { m_pCompiler->optRemoveRangeCheck(treeParent, stmt); } return; }
  • 28. Byte array 29 private static readonly byte[] _data = new byte[256] { 1, 2, 3, … }; public static byte GetByte(int i) { return _data[i]; }
  • 29. Byte array: Roslyn hack (new feature) 30 private static ReadOnlySpan<byte> _data => new byte[256] { 1, 2, 3, … }; public static byte GetByte(int i) { return _data[i]; } 256
  • 30. Byte array: byte index (my PR) 31 private static ReadOnlySpan<byte> _data => new byte[256] { 1, 2, 3, … }; public static byte GetByte(int i) { return _data[(byte)i]; } Byte indexer will never go out of bounds!
  • 31. rangecheck.cpp (simplified) 32 void RangeCheck::OptimizeRangeCheck(GenTreeBoundsChk* bndsChk) { // Get the range for this index. Range range = GetRange(...); // If upper or lower limit is unknown, then return. if (range.UpperLimit().IsUnknown() || range.LowerLimit().IsUnknown()) { return; } // Is the range between the lower and upper bound values. if (BetweenBounds(range, 0, bndsChk->gtArrLen)) { m_pCompiler->optRemoveRangeCheck(treeParent, stmt); } return; } [Byte.MinValue ... Byte.MaxValue] ArrLen = 256
  • 33. Loop Invariant Code Hoisting 34 public static bool Test(int[] a, int c) { for (int i = 0; i < a.Length; i++) { if (a[i] == c + 44) return false; } return true; }
  • 34. Loop Invariant Code Hoisting 35 public static bool Test(int[] a, int c) { int tmp = c + 44; for (int i = 0; i < a.Length; i++) { if (a[i] == tmp) return false; } return true; }
  • 35. NYI: Loop-unrolling 36 public static int Test(int[] a) { int sum = 0; for (int i = 0; i < a.Length; i++) { sum += a[i]; } return sum; }
  • 36. NYI: Loop-unrolling 37 public static int Test(int[] a) { int sum = 0; for (int i = 0; i < a.Length - 3; i += 4) { sum += a[i]; sum += a[i+1]; sum += a[i+2]; sum += a[i+3]; } return sum; }
  • 37. NYI: Loop-unswitch 38 public static int Test(int[] a, bool condition) { int agr = 0; for (int i = 0; i < a.Length; i++) { if (condition) agr += a[i]; else agr *= a[i]; } return agr; }
  • 38. NYI: Loop-unswitch 39 public static int Test (int[] a, bool condition) { int agr = 0; if (condition) for (int i = 0; i < a.Length; i++) agr += a[i]; else for (int i = 0; i < a.Length; i++) agr *= a[i]; return agr; }
  • 39. NYI: Loop-deletion 40 public static void Test() { for (int i = 0; i < 10; i++) { } } ; Method Program:Test() G_M44187_IG01: G_M44187_IG02: xor eax, eax G_M44187_IG03: inc eax cmp eax, 10 jl SHORT G_M44187_IG03 G_M44187_IG04: ret ; Total bytes of code: 10
  • 40. NYI: Loop-deletion 41 public static void Test() { } ; Method Program:DeadLoop() ret ; Total bytes of code: 1
  • 41. NYI: InductiveRangeCheckElimination 42 public static void Zero1000Elements(int[] array) { for (int i = 0; i < 1000; i++) array[i] = 0; // bound checks will be inserted here }
  • 42. NYI: InductiveRangeCheckElimination 43 public static void Zero1000Elements(int[] array) { int limit = Math.Min(array.Length, 1000); for (int i = 0; i < limit; i++) array[i] = 0; // bound checks are not needed here! for (int i = limit; i < 1000; i++) array[i] = 0; // bound checks are needed here // so at least we could "zero" first `limit` elements without bound checks } NOTE: this LLVM optimization pass is not enabled by default in `opt –O2`. Contributed by Azul developers (LLVM for JVM)
  • 43. NYI: InductiveRangeCheckElimination 44 public static void Zero1000Elements(int[] array) { int limit = Math.Min(array.Length, 1000); for (int i = 0; i < limit - 3; i += 4) { array[i] = 0; array[i+1] = 0; array[i+2] = 0; array[i+3] = 0; } for (int i = limit; i < 1000; i++) array[i] = 0; // bound checks are needed here // so at least we could "zero" first `limit` elements without bound checks } Now we can even unroll the first loop!
  • 44. NYI: InductiveRangeCheckElimination 45 public static void Zero1000Elements(int[] array) { int limit = Math.Min(array.Length, 1000); memset(array, 0, limit); for (int i = limit; i < 1000; i++) array[i] = 0; // bound checks are needed here // so at least we could "zero" first `limit` elements without bound checks } Or just replace with memset call