SlideShare une entreprise Scribd logo
1  sur  25
Télécharger pour lire hors ligne
This work was performed under the auspices of the U.S. Department of Energy by Lawrence Livermore National Laboratory
under contract DE-AC52-07NA27344. Lawrence Livermore National Security, LLC
Enhancing	
  Domain	
  Specific	
  Language	
  Implementa5ons	
  
Through	
  Ontology	
  
WOLFHPC15:	
  Fi-h	
  Interna5onal	
  Workshop	
  on	
  Domain-­‐Specific	
  Languages	
  and	
  High-­‐Level	
  
Frameworks	
  for	
  High	
  Performance	
  Compu5ng	
  
Chunhua	
  “Leo”	
  Liao,	
  	
  Pei-­‐Hung	
  Lin	
  ,	
  Daniel	
  J.	
  Quinlan,	
  	
  	
  
Yue	
  Zhao*	
  ,	
  and	
  Xipeng	
  Shen*	
  
Nov. 16, 2015
LLNL-PRES-679077
*
2	
  
§  Mo5va5on	
  
§  Methodology	
  
—  Ontology-­‐based	
  knowledge	
  base	
  
—  Compiler/run5me	
  interface	
  	
  
§  Evalua5on	
  
—  Ontology	
  for	
  Stencil	
  computa5on	
  
—  Compiler	
  implementa5on	
  
—  Preliminary	
  results	
  
§  Related	
  work	
  
§  Discussion	
  
Outline	
  
3	
  
§  DSLs:	
  aZrac5ve	
  to	
  program	
  HPC	
  machines	
  
—  High-­‐level	
  annota5ons	
  with	
  rich	
  seman5cs	
  to	
  efficiently	
  express	
  domain	
  algorithms	
  
—  Performance:	
  DSL	
  implementa5ons	
  (compilers	
  +	
  run5me	
  systems)	
  
§  Seman5c	
  gap:	
  Large	
  body	
  of	
  knowledge	
  is	
  required	
  to	
  build	
  efficient	
  implementa5ons	
  
—  Applica5on	
  domain	
  op5miza5on	
  knowledge	
  
—  Language	
  seman5cs:	
  DSLs	
  and	
  	
  host	
  languages	
  
—  Library	
  interface	
  seman5cs	
  
—  Hardware	
  architecture	
  features	
  	
  
§  Current	
  problem:	
  	
  all	
  the	
  knowledge	
  is	
  implicitly	
  assumed	
  or	
  represented	
  using	
  ad-­‐hoc	
  
approaches	
  
—  informal,	
  	
  not	
  reusable,	
  	
  not	
  scalable	
  
§  Our	
  solu5on:	
  adapt	
  modern	
  knowledge-­‐engineering	
  method	
  to	
  enhance	
  DSL	
  
implementa5ons	
  
Mo5va5on	
  
4	
  
§  Applica5on	
  domains	
  
—  Data:	
  grid,	
  points,	
  stencil,	
  halo,	
  …	
  
—  Algorithms:	
  Itera5ve	
  algorithm,	
  convergence	
  	
  
§  Language	
  seman5cs:	
  general-­‐purpose	
  languages	
  and	
  DSLs	
  
—  non-­‐aliasing,	
  non-­‐overlapping,	
  fixed-­‐sized	
  vs	
  resizable	
  
—  func5ons	
  :	
  read/write	
  variable	
  sets,	
  pure,	
  virtual	
  	
  
§  Library	
  knowledge:	
  standard	
  and	
  domain-­‐specific	
  
—  	
  Containers:	
  	
  unique,	
  ordered,	
  sorted,	
  con5nuous	
  storage	
  
—  Iterators:	
  random	
  access,	
  bidirec5onal	
  
§  HPC	
  hardware	
  architecture	
  configura5ons	
  
—  CPUs:	
  number	
  of	
  cores,	
  cache	
  sizes	
  	
  
—  NVIDIA	
  GPU:	
  	
  shared	
  memory,	
  	
  global	
  memory,	
  constant	
  
memory	
  
What	
  knowledge?	
  
Application
Domain
Hardware
Languages
(DSL, GPLs)
Libraries
Runtimes
Operating
System
HPC software/hardware stack
5	
  
DSL	
  knowledge	
  is	
  oBen	
  hard	
  to	
  extract,	
  cri5cal	
  to	
  performance	
  
§  High-­‐level	
  abstrac5ons	
  in	
  DSL	
  
—  Encouraging	
  code	
  reuse	
  and	
  hiding	
  implementa5on	
  from	
  interfaces	
  to	
  reduce	
  so-ware	
  
complexity	
  
—  Func5ons,	
  data	
  structures,	
  classes	
  and	
  templates	
  …	
  
—  Could	
  be	
  standard	
  or	
  user-­‐defined/domain-­‐specific	
  
§  Seman5cs	
  of	
  high-­‐level	
  abstrac5ons	
  
—  Cri5cal	
  to	
  op5miza5ons,	
  including	
  paralleliza5on	
  
•  Read-­‐only	
  seman5cs	
  
—  Hard	
  to	
  be	
  extracted	
  by	
  sta5c	
  analysis	
  	
  
•  STL	
  vector	
  implementa5on	
  ?à?	
  elements	
  are	
  con5guous	
  in	
  memory	
  
§  Conven5onal	
  compilers	
  lose	
  track	
  of	
  abstrac5ons	
  
—  Analyses	
  and	
  op5miza5ons	
  are	
  mostly	
  done	
  on	
  top	
  of	
  middle	
  or	
  low	
  level	
  IR	
  
—  Hard	
  to	
  trace	
  back	
  to	
  the	
  high-­‐level	
  abstrac5ons	
  represented	
  in	
  source	
  level	
  
6	
  
§  Informal	
  and	
  Ad-­‐Hoc	
  	
  
§  Isolated:	
  	
  separated	
  managing	
  so-ware	
  
and	
  hardware	
  knowledge	
  
§  Hard	
  to	
  reusable,	
  not	
  scalable	
  
§  No	
  toolchain	
  support:	
  	
  
—  Parser:	
  	
  each	
  solu5on	
  has	
  its	
  own	
  parser	
  
—  Lack	
  of	
  IDE,	
  valida5on	
  tools,	
  etc.	
  	
  
§  Not	
  easy	
  to	
  share	
  among	
  DSL	
  designers,	
  	
  
domain	
  experts	
  and	
  DSL	
  developers	
  
Problems	
  with	
  current	
  HPC	
  knowledge	
  management	
  approaches	
  	
  
class std::vector<T> {
alias none; overlap none; //elements are alias-free and non-overlapping
is_fixed_sized_array { //semantic-preserving functions as a fixed-sized array
length(i) = {this.size()};
element(i) = {this.operator[](i); this.at(i);};
};
};
void my_processing(SgNode* func_def) {
read{func_def}; modify {func_def}; //side effects of a function
}
std::list<SgFunctionDef*> findCFunctionDefinition(SgNode* root){
read {root}; modify {result};
return unique; //return a unique set
}
void Compass::OutputObject::addOutput(SgNode* node){
read {node};
//order-independent side effects
modify {Compass::OutputObject::outputList<order_independent>};
}
Semantic specification file Liao’10
Memory Specification Language: Chen’14
7	
  
A	
  new	
  ontology-­‐driven	
  DSL	
  implementa5on	
  paradigm	
  
Application Domains
Domain Specific Languages
General-Purpose
Languages
Libraries
Hardware Architectures
A Knowledge Base using
OWL
----------
[upper ontology]
...
[domain ontologies]
Relevant knowledge domains
...
Compiler
Runtime
DSL Implementations
Domain Experts
DSL Designers
DSL developers
Architects
...
End-user tools/
APIs
8	
  
§  Defini5ons:	
  	
  
—  Philosophy:	
  the	
  study	
  of	
  nature	
  of	
  beings	
  and	
  
their	
  rela5ons	
  
—  Modern:	
  a	
  formal	
  specifica5on	
  for	
  explicitly	
  
represen5ng	
  knowledge	
  of	
  the	
  en55es	
  in	
  a	
  
domain	
  
§  Provides	
  a	
  common	
  vocabulary	
  in	
  a	
  domain	
  
—  Concepts,	
  proper5es,	
  and	
  individuals	
  
§  Theory	
  founda5on:	
  descrip5on	
  logics	
  (DLs),	
  a	
  
family	
  of	
  logical	
  languages	
  for	
  knowledge	
  
representa5on	
  
—  several	
  dialects	
  with	
  different	
  expressiveness	
  
and	
  efficiencies	
  (for	
  reasoning)	
  	
  
Central	
  component:	
  ontology-­‐based	
  knowledge	
  base	
  
9	
  
§  Web	
  Ontology	
  Language	
  (OWL):	
  the	
  most	
  popular	
  ontology	
  languages	
  
—  Classes	
  (or	
  concepts):	
  denote	
  sets	
  of	
  individuals.	
  	
  organized	
  in	
  a	
  hierarchy	
  
—  Individuals	
  (or	
  instances):	
  single	
  instances	
  in	
  the	
  domain	
  
—  Proper5es	
  (or	
  rela5ons):	
  	
  binary	
  rela5ons	
  between	
  en55es	
  
—  Maturing	
  tool	
  chain	
  since	
  ’04	
  by	
  W3C:	
  Protégé	
  IDE,	
  OWL	
  API,	
  FaCT++,	
  SWI-­‐Prolog,	
  	
  etc.	
  
Web	
  Ontology	
  Language	
  
10	
  
Example	
  family	
  ontology	
  using	
  OWL	
  
Person
Man Women
John Mary
instanceOf
subClassOf subClassOf
instanceOf
hasWife
hasHusband
Prefix(:=<http://example.com/owl/families/>)
Ontology(<http://example.com/owl/families>
Declaration( NamedIndividual( :John ) )
Declaration( NamedIndividual( :Mary ) )
Declaration( Class( :Person ) )
Declaration( Class( :Woman ) )
Declaration( ObjectProperty( :hasWife ) )
Declaration( ObjectProperty( :hasSpouse) )
SubClassOf( :Woman :Person )
SubClassOf( :Man :Person )
ClassAssertion(:Man :John)
ClassAssertion(:Women :Mary)
SubObjectPropertyOf( :hasWife :hasSpouse )
ObjectPropertyAssertion( :hasWife :John :Mary )
... )
Family ontology in functional syntax
11	
  
SoBware	
  and	
  hardware	
  ontology	
  for	
  DSL	
  implementa5ons	
  
Hardware
Cplusplus
Program
owl:Thing
Processor
Memory
Type
Statement
Expression
CPU
GPU
Variable
....
MainMemory
Cache
Xeon
Kepler
....
IterationStatement
AssignmentStatement
DeclarationStatement
....
Library
§  Requirements:	
  
—  common	
  vocabulary	
  
—  intui5ve,	
  efficient	
  to	
  facilitate	
  DSL	
  analyses	
  
and	
  op5miza5ons	
  
§  So-ware:	
  	
  based	
  on	
  C	
  and	
  C++	
  language	
  
standards	
  
—  Individuals:	
  	
  use	
  interna5onal	
  resource	
  iden5fiers	
  
(IRIs)	
  –	
  “hZp://example.com/owl/CProgram:Type”	
  
•  Scoped	
  IRI	
  -­‐	
  qualified	
  name	
  +	
  rela5ve	
  loca5on	
  
•  E.g.:	
  file.c::foo()::1:6,1:10	
  	
  	
  	
  	
  std::vector<T>,	
  	
  	
  	
  
std::vector<int>,	
  	
  
§  Hardware:	
  	
  focus	
  on	
  single	
  machine	
  for	
  now	
  
—  Individuals:	
  	
  machine1,	
  cpu2,	
  memory	
  ,	
  iden5fied	
  by	
  
names	
  	
  or	
  serial	
  numbers	
  
§  Space	
  Efficiency	
  -­‐	
  core	
  knowledge	
  base	
  +	
  loadable	
  
supplemental	
  modules	
  (for	
  individuals)	
  
Properties:
aliasing, read, write, hasType,
hasScope, calledBy, …
Properties: sub class of, part of ,
frequency, has-a, …
12	
  
Excerpt	
  of	
  SoBware	
  and	
  Hardware	
  Ontology	
  with	
  Individuals	
  
% program concepts and their hierarchy
Class(:PointerType) SubClassOf(:PointerType :DerivedType)
Class(:BinaryOp) Class(:AddOp)
SubClassOf(:AddOp :BinaryOp)
SubClassOf(:SelectionStatement :Statement)
SubClassOf(:IfStatement :SelectionStatement)
% define the relations between program constructs
ObjectProperty(:hasScope) ObjectProperty(:hasType)
ObjectProperty( :alias) ObjectProperty(:read)
% variables identified by a qualified names
NamedIndividual ( :var1) NamedIndividual ( :var2)
% two pointer variables alias each other
ObjectPropertyAssertion(:hasType :var1 :PointerType) …
ObjectPropertyAssertion (:alias :var1 :var2)
% hardware concepts and hierarchy
Class(:Xeon) Class(:CPU)
SubClassOf(:Xeon :CPU )
Class( :X5680 )
SubClassOf( :X5680 :Xeon )
ClassAssertion(:X5680 :cpu1)
Class(:GPU)
SubClassOf(:Quadro_4000 :GPU)
SubClassOf(:Quadro_4000 DataHasValue(:numberOfCores "256"))
% Individuals
NamedIndividual(:gpu1)
NamedIndividual(:cpu1)
ClassAssertion(:Quadro_4000 :gpu1)
% a workstation with CPU and GPU
NamedIndividual(:tux322)
ClassAssertion(:computer :tux322)
ObjectPropertyAssertion(:hasPart :tux322 :gpu1)
ObjectPropertyAssertion(:hasPart :tux322 :cpu2)
13	
  
§  Requirements	
  
—  Bidirec5onal:	
  	
  query	
  +	
  inser5on	
  
—  Support	
  both	
  on-­‐disk	
  and	
  in-­‐memory	
  storage	
  
§  Knowledge	
  representa5on:	
  OWL	
  
—  On	
  disk	
  as	
  OWL	
  files	
  	
  
—  In-­‐memory	
  storage:	
  	
  SWI-­‐Prolog	
  predicates	
  
§  Two	
  kinds	
  of	
  interfaces	
  
—  Prolog	
  reasoning	
  engine:	
  powerful	
  declara5ve	
  
Prolog	
  queries	
  opera5ng	
  on	
  in-­‐memory	
  storage	
  
•  Learning	
  curve	
  
—  Prebuilt	
  C/C++	
  query	
  interface:	
  	
  light	
  weight	
  
opera5ng	
  on	
  OWL	
  files,	
  easy	
  for	
  deployment	
  
Compiler	
  and	
  run5me	
  interface	
  of	
  ontology-­‐based	
  knowledge	
  
base	
  
% Prolog query interfaces
% Define additional predicates
checkClass(IndividualL, ClassL) :-
rdf(IndividualL, rdf:type, ClassL).
% recursively check through subclasses
checkClass(IndividualL, ClassL) :-
rdf(SubClassL, rdfs:subClassOf, ClassL),
checkClass(IndividualL, SubClassL).
% if individual tux322 is a computer?
?- checkClass (‘tux322’, ‘:computer’)
% What are the variables written by function bar?
?- Function(X),hasName(X,'bar'),writtenBy(Vars,X)
// C++ query interface
std::string getMachineName (string machine_IRI);
vector<std::string> getCPUs (string machine_name);
vector<std::string> getGPUs (string machine_name);
int getCoreCount (string gpu_IRI);
14	
  
Evalua5on	
  benefits	
  of	
  ontology	
  for	
  DSL	
  implementa5ons	
  
Ontology-based knowledge base for DSL implementations
ROSE
Frontend
Knowledge
Generator
DSL
Program
Ontology-Based Knowledge Base
(DSL, Program, Hardware)
AST
AutoPar PolyOpt
Protege
Domain experts
DSL designers
Programmers
Architects
C++
Program
ROSE
Unparser
DSL Lowering
SWI-Prolog Semantic
Web Library
ROSE Midend
§  The	
  DSL:	
  Shi-	
  Calculus	
  
—  Light-­‐weight	
  embedded	
  DSL	
  
—  Host	
  language:	
  C++	
  
—  Leverage	
  Chombo	
  AMR	
  library	
  
[Colella’09]	
  
§  The	
  implementa5on:	
  a	
  
Shi-Calculus	
  source-­‐to-­‐source	
  
translator	
  
—  Built	
  on	
  top	
  of	
  ROSE,	
  a	
  source-­‐to-­‐
source	
  compiler	
  infrastructure	
  
developed	
  at	
  LLNL	
  
—  Connec5ng	
  to	
  ontology	
  
HOMP (CUDA
Generation)
OpenMP
Lowering
15	
  
ShiBCalculus	
  DSL	
  syntax	
  and	
  seman5cs	
  
16	
  
ShiBCalculus	
  DSL	
  for	
  stencil	
  computa5on	
  
#define DIM 3
int main(int argc, char* argv[])
{
Point lo, hi;
// Space discretization
Box bxdest(lo,hi);
Box bxsrc=bxdest.grow(1);
...
// Source and destination data containers
RectMDArray<double,1> Asrc(bxsrc);
RectMDArray<double,1> Adest(bxdest);
...
double ident, C0;
// Shift and Stencil declarations
array<Shift,DIM> shft_vec = getShiftVec();
Stencil<double> laplace = C0*(shft_vec^zero);
// Stencil formation using Shift
for (int dir=0;dir<DIM;dir++)
{
Point thishft = getUnitv(dir);
laplace = laplace + ident*(shft_vec^thishft);
laplace = laplace + ident*(shft_vec^(thishft*(-1)));
}
// Apply stencil computation using data containers in space
Stencil<double>::apply(laplace, Asrc, Adest, bxdest);
}
Constructing stencil at the initial point (red cell in the figure).
CO is the coefficient applied to this stencil cell.
Iteratively constructing stencil for the surrounding points
(green cells in figure). “ident” is the coefficient applied to
these stencil cells.
Applying the stencil, source data, destination data, and
destination box to the computation. Destination box
represents the loops in the computation.
3-D 7-point stencil
17	
  
Generated	
  sequen5al	
  C++	
  output	
  for	
  Laplacian	
  example	
  
1 int main (int argc , char * argv [])
2 {
3 ...
4 const class Point lo( zero );
5 const class Point hi = getOnes () * adjustedBlockSize ;
6 const class Box bxdest (lo ,hi);
7 const class Box bxsrc = bxdest . grow (1) ;
8 class RectMDArray < double , 1 , 1 , 1 > Asrc ( bxsrc );
9 class RectMDArray < double , 1 , 1 , 1 > Adest ( bxdest );
10 const double ident = 1.0;
11 const double C0 = -6.00000;
12 ...
13 double * sourceDataPointer = Asrc.getPointer ();
14 double * destinationDataPointer = Adest.getPointer ();
15 for (k = lb2; k < ub2; ++k) {
16 for (j = lb1; j < ub1; ++j) {
17 for (i = lb0; i < ub0; ++i) {
18 destinationDataPointer [ arraySize_X * ( arraySize_Y * k + j) + i] =
sourceDataPointer [ arraySize_X * ( arraySize_Y * (k + -1) + j)+ i] +
sourceDataPointer [ arraySize_X * ( arraySize_Y * (k + 1)+ j) + i] +
sourceDataPointer [ arraySize_X * ( arraySize_Y * k+ (j + -1)) + i] +
sourceDataPointer [ arraySize_X * (arraySize_Y * k + (j + 1)) + i] +
sourceDataPointer [ arraySize_X * ( arraySize_Y * k + j) + (i + -1)] +
sourceDataPointer [ arraySize_X * ( arraySize_Y * k + j) + (i +1)] +
sourceDataPointer [ arraySize_X * ( arraySize_Y * k + j) +i] * -6.00000;
…
22 } } } }
More efficient code generation
( parallelization, vectorization, loop tiling,
GPU porting, etc.) need:
1.  Semantics of high level constructs
double * RectMDArray<>::getPointer()
2. Hardware details for target platforms
CPU, GPU, cache, memory size, etc.
18	
  
Connection between Ontology and DSL implementations
Shift Calculus DSL
Hardware
CplusplusProgram
ChomboLibrary
owl:Thing
Processor
Memory
Type
Statement
Expression
Stencil
Chombo::Box
CPU
GPU
RectMDArray
Variable
....
MainMemory
Cache
Xeon
Kepler
....
....
IterationStatement
AssignmentStatement
DeclarationStatement
Shift
Point
....
Coefficient Software semantics to
enable parallelization
HW specification
In code generation
ObjectProperty
•  alias, read, write, ….
DataProperty
•  Frequency, cache size
19	
  
§  Knowledge	
  genera5on:	
  traverse	
  AST	
  to	
  generate	
  a	
  program’s	
  concrete	
  individuals	
  
and	
  rela5on	
  instances	
  
—  Individual	
  language	
  constructs:	
  	
  	
  
•  func5ons,	
  loops,	
  types,	
  variable	
  references,	
  statements,	
  func5on	
  calls,	
  etc.	
  
—  Rela5ons:	
  	
  
•  Structural:	
  hasName,	
  hasParent,	
  hasScope,	
  calledBy,	
  returnedBy	
  ….	
  
•  Analysis:	
  alias,	
  	
  access	
  (read,	
  write),	
  overlap,	
  dependent	
  (true,	
  an5,	
  output)	
  ,	
  …	
  
§  Seman5cs	
  propaga5on	
  	
  
—  Across	
  different	
  levels	
  of	
  IRs:	
  transforma5on	
  tracking	
  
•  A	
  special	
  model:	
  	
  no	
  memory	
  reuse	
  for	
  AST	
  nodes	
  to	
  ensure	
  unique	
  IDs	
  
•  API	
  func5ons	
  to	
  store	
  input	
  nodes	
  for	
  a	
  generated	
  node:	
  	
  m:	
  n	
  mapping	
  
•  Update	
  knowledge	
  base	
  with	
  subClassOf(:low-­‐level-­‐en5ty	
  :high-­‐level-­‐en5ty)	
  to	
  connect	
  these	
  en55es	
  	
  
•  Queries	
  on	
  low-­‐level	
  en55es	
  can	
  return	
  seman5cs	
  associated	
  with	
  high-­‐level	
  en55es.	
  
—  Func5on	
  calls:	
  	
  	
  	
  a	
  =	
  foo(b):	
  	
  	
  	
  read/write	
  formal	
  	
  parameters	
  à	
  actual	
  parameters	
  
More	
  details	
  about	
  compiler	
  interac5ons	
  with	
  ontology	
  
20	
  
Output Variants Compiler/
Optimizations
Execution time (sec.)
C++ serial Baseline 1.51482
C++ OpenMP Parallel AutoPar 0.380562
C++ Polyhedral Tiled+parallel PolyOpt 0.503307
CUDA w/ data transfer HOMP 9.29446
CUDA w/o data transfer HOMP 3.14713e-05
Preliminary	
  results	
  
§  Configura5on	
  of	
  tes5ng	
  pla{orm:	
  
—  24-­‐core	
  worksta5on	
  with	
  Intel	
  Xeon	
  CPU	
  E5-­‐2620	
  V.3	
  and	
  64	
  GB	
  memory.	
  	
  
—  GCC	
  version	
  4.8.3,	
  NVCC	
  compiler	
  version	
  7.0.	
  
21	
  
§  Ontology-­‐based	
  knowledge	
  bases	
  
—  General	
  purpose:	
  Cyc[Matuszek’06],	
  SUMO[Pease’02]	
  
—  Domain-­‐specific:	
  	
  Gene	
  	
  ontology[Ashburner’00]	
  (Biology),	
  	
  KnowRob	
  [Tenorth’09](service	
  
robots),	
  Human	
  behaviors[Rodriguez’14]	
  
§  Knowledge	
  engineering	
  methodologies	
  applied	
  to	
  DSL	
  analysis	
  and	
  design	
  
—  Ontology-­‐based	
  domain	
  analysis:	
  [Tairas’09],	
  	
  
—  Transla5ng	
  Ontology	
  to	
  DSL	
  grammars:	
  	
  [Ceh’11]	
  
—  Domain-­‐specific	
  modeling	
  languages:	
  [Brauer’08],	
  [Walter’09]	
  
§  Our	
  work	
  :	
  first	
  to	
  use	
  ontology	
  to	
  enhance	
  DSL	
  implementa5ons	
  
Related	
  work	
  
22	
  
§  A	
  novel	
  ontology-­‐based	
  knowledge	
  base	
  to	
  enhance	
  DSL	
  implementa5ons	
  
—  Enable	
  easy	
  knowledge	
  accumula5on,	
  reuse	
  and	
  sharing	
  among	
  so-ware	
  and	
  human	
  users	
  
—  Formal,	
  explicit	
  and	
  uniform	
  format	
  using	
  OWL	
  
—  Ini5al	
  concepts	
  and	
  rela5ons	
  modeled	
  for	
  so-ware	
  and	
  hardware	
  domains	
  
—  Bidirec5onal	
  connec5on	
  to	
  compilers	
  
—  Prototype	
  Shi-Calculus	
  DSL	
  implementa5on	
  enabled	
  by	
  ontology	
  
§  Ongoing/Future	
  work:	
  	
  	
  
—  Collec5ng	
  and	
  represen5ng	
  stencil	
  op5miza5on	
  knowledge	
  
—  Context-­‐aware	
  op5miza5on	
  advisors	
  
—  Declara5ve,	
  generic	
  program	
  analysis	
  and	
  transforma5on	
  
Discussion	
  
Questions and comments?
Thank	
  You!	
  
24	
  
Int main(){
…
{ int c0; int c3; int c4; int c5; int c1; int c2;
if (lb0 <= ub0 + -1 && lb1 <= ub1 + -1 && lb2 <= ub2 + -1) {
#pragma omp parallel for private(c2, c1, c5, c4, c3)
for (c0 = ((lb2 + -31) * 32 < 0?-(-(lb2 + -31) / 32) : ((32 < 0?(-(lb2 + -31) + - 32 - 1) / - 32 : (lb2 + -31 + 32 - 1) / 32))); c0 <= (((ub2 + -1) * 32 < 0?((32 < 0?-((-(ub2 +
-1) + 32 + 1) / 32) : -((-(ub2 + -1) + 32 - 1) / 32))) : (ub2 + -1) / 32)); c0++) {
for (c1 = ((lb1 + -31) * 32 < 0?-(-(lb1 + -31) / 32) : ((32 < 0?(-(lb1 + -31) + - 32 - 1) / - 32 : (lb1 + -31 + 32 - 1) / 32))); c1 <= (((ub1 + -1) * 32 < 0?((32 < 0?-((-(ub1 +
-1) + 32 + 1) / 32) : -((-(ub1 + -1) + 32 - 1) / 32))) : (ub1 + -1) / 32)); c1++) {
for (c2 = ((lb0 + -17) * 18 < 0?-(-(lb0 + -17) / 18) : ((18 < 0?(-(lb0 + -17) + - 18 - 1) / - 18 : (lb0 + -17 + 18 - 1) / 18))); c2 <= (((ub0 + -1) * 18 < 0?((18 < 0?-((-(ub0
+ -1) + 18 + 1) / 18) : -((-(ub0 + -1) + 18 - 1) / 18))) : (ub0 + -1) / 18)); c2++) {
for (c3 = (32 * c0 > lb2?32 * c0 : lb2); c3 <= ((32 * c0 + 31 < ub2 + -1?32 * c0 + 31 : ub2 + -1)); c3++) {
for (c4 = (32 * c1 > lb1?32 * c1 : lb1); c4 <= ((32 * c1 + 31 < ub1 + -1?32 * c1 + 31 : ub1 + -1)); c4++) {
#pragma ivdep
#pragma vector always
#pragma simd
for (c5 = (18 * c2 > lb0?18 * c2 : lb0); c5 <= ((18 * c2 + 17 < ub0 + -1?18 * c2 + 17 : ub0 + -1)); c5++) {/* tiled for 3 dims */
destinationDataPointer[c3][c4][c5] = sourceDataPointer[c3 + - 1][c4][c5] + sourceDataPointer[c3 + 1][c4][c5] + sourceDataPointer[c3][c4 + - 1][c5] +
sourceDataPointer[c3][c4 + 1][c5] + sourceDataPointer[c3][c4][c5 + - 1] + sourceDataPointer[c3][c4][c5 + 1] + sourceDataPointer[c3][c4][c5] * - 6.00000;
… } } } } } } } } }
Tiled	
  &	
  parallelized	
  (w/	
  OMP	
  and	
  SIMD)	
  C++	
  output	
  
25	
  
HOST code:
int main(int argc,char *argv[])
{…
{
double *_dev_sourceDataPointer;
int _dev_sourceDataPointer_size = sizeof(double ) * (1764 - 0);
_dev_sourceDataPointer = ((double *)(xomp_deviceMalloc(_dev_sourceDataPointer_size)));
xomp_memcpyHostToDevice(((void *)_dev_sourceDataPointer),((const void
*)sourceDataPointer),_dev_sourceDataPointer_size);
double *_dev_destinationDataPointer;
int _dev_destinationDataPointer_size = sizeof(double ) * (1764 - 0);
_dev_destinationDataPointer = ((double *)
(xomp_deviceMalloc(_dev_destinationDataPointer_size)));
int _threads_per_block_ = xomp_get_maxThreadsPerBlock();
int _num_blocks_ = xomp_get_max1DBlock(__final_total_iters__3__ - 1 - 0 + 1);
OUT__1__1527__<<<_num_blocks_,_threads_per_block_>>>(__final_total_iters__3__,__k_interval
__4__,__j_interval__5__,_dev_sourceDataPointer,_dev_destinationDataPointer);
xomp_freeDevice(_dev_sourceDataPointer);
xomp_memcpyDeviceToHost(((void *)destinationDataPointer),((const void
*)_dev_destinationDataPointer),_dev_destinationDataPointer_size);
xomp_freeDevice(_dev_destinationDataPointer);
}
}
Generated	
  CUDA	
  output	
  
Device	
  code:	
  
extern	
  "C"	
  __global__	
  void	
  OUT__1__1527__(int	
  __final_total_iters__3__,int	
  __k_interval__4__,int	
  
__j_interval__5__,double	
  *_dev_sourceDataPointer,double	
  *_dev_des5na5onDataPointer)	
  
{	
  
	
  	
  int	
  _p_k;	
  	
  	
  int	
  _p_j;	
  	
  	
  int	
  _p_i;	
  	
  	
  int	
  _p___collapsed_index__7__;	
  
	
  	
  int	
  _dev_lower;	
  	
  	
  int	
  _dev_upper;	
  
	
  	
  int	
  _dev_loop_chunk_size;	
  	
  	
  int	
  _dev_loop_sched_index;	
  	
  int	
  _dev_loop_stride;	
  
	
  	
  int	
  _dev_thread_num	
  =	
  getCUDABlockThreadCount(1);	
  
	
  	
  int	
  _dev_thread_id	
  =	
  getLoopIndexFromCUDAVariables(1);	
  
	
  	
  XOMP_sta5c_sched_init(0,__final_total_iters__3__	
  -­‐	
  
1,1,1,_dev_thread_num,_dev_thread_id,&_dev_loop_chunk_size,&_dev_loop_sched_index,&_dev_loop_stride);	
  
	
  	
  while(XOMP_sta5c_sched_next(&_dev_loop_sched_index,__final_total_iters__3__	
  -­‐	
  
1,1,_dev_loop_stride,_dev_loop_chunk_size,_dev_thread_num,_dev_thread_id,&_dev_lower,&_dev_upper))	
  
	
  	
  	
  	
  for	
  (_p___collapsed_index__7__	
  =	
  _dev_lower;	
  _p___collapsed_index__7__	
  <=	
  _dev_upper;	
  _p___collapsed_index__7__	
  
+=	
  1)	
  {	
  
	
  	
  	
  	
  	
  	
  _p_k	
  =	
  _p___collapsed_index__7__	
  /	
  __k_interval__4__	
  *	
  1	
  +	
  lb2;	
  
	
  	
  	
  	
  	
  	
  int	
  __k_remainder	
  =	
  _p___collapsed_index__7__	
  %	
  __k_interval__4__;	
  
	
  	
  	
  	
  	
  	
  _p_j	
  =	
  __k_remainder	
  /	
  __j_interval__5__	
  *	
  1	
  +	
  lb1;	
  
	
  	
  	
  	
  	
  	
  _p_i	
  =	
  __k_remainder	
  %	
  __j_interval__5__	
  *	
  1	
  +	
  lb0;	
  
	
  	
  	
  	
  	
  	
  _dev_des5na5onDataPointer[arraySize_X	
  *	
  (arraySize_Y	
  *	
  _p_k	
  +	
  _p_j)	
  +	
  _p_i]	
  =	
  _dev_sourceDataPointer[arraySize_X	
  *	
  
(arraySize_Y	
  *	
  (_p_k	
  +	
  -­‐1)	
  +	
  _p_j)	
  +	
  _p_i]	
  +	
  _dev_sourceDataPointer[arraySize_X	
  *	
  (arraySize_Y	
  *	
  (_p_k	
  +	
  1)	
  +	
  _p_j)	
  +	
  _p_i]	
  +	
  
_dev_sourceDataPointer[arraySize_X	
  *	
  (arraySize_Y	
  *	
  _p_k	
  +	
  (_p_j	
  +	
  -­‐1))	
  +	
  _p_i]	
  +	
  _dev_sourceDataPointer[arraySize_X	
  *	
  
(arraySize_Y	
  *	
  _p_k	
  +	
  (_p_j	
  +	
  1))	
  +	
  _p_i]	
  +	
  _dev_sourceDataPointer[arraySize_X	
  *	
  (arraySize_Y	
  *	
  _p_k	
  +	
  _p_j)	
  +	
  (_p_i	
  +	
  -­‐1)]	
  +	
  
_dev_sourceDataPointer[arraySize_X	
  *	
  (arraySize_Y	
  *	
  _p_k	
  +	
  _p_j)	
  +	
  (_p_i	
  +	
  1)]	
  +	
  _dev_sourceDataPointer[arraySize_X	
  *	
  
(arraySize_Y	
  *	
  _p_k	
  +	
  _p_j)	
  +	
  _p_i]	
  *	
  -­‐6.00000;	
  
	
  	
  	
  	
  }	
  	
  	
  }	
  

Contenu connexe

Tendances

F# Type Provider for R Statistical Platform
F# Type Provider for R Statistical PlatformF# Type Provider for R Statistical Platform
F# Type Provider for R Statistical PlatformHoward Mansell
 
Yang in OpenDaylight
Yang in OpenDaylightYang in OpenDaylight
Yang in OpenDaylightGunjan Patel
 
Installing & Configuring OpenLDAP (Hands On Lab)
Installing & Configuring OpenLDAP (Hands On Lab)Installing & Configuring OpenLDAP (Hands On Lab)
Installing & Configuring OpenLDAP (Hands On Lab)Michael Lamont
 
The Semantics of SPARQL
The Semantics of SPARQLThe Semantics of SPARQL
The Semantics of SPARQLOlaf Hartig
 
Semantic Integration with Apache Jena and Stanbol
Semantic Integration with Apache Jena and StanbolSemantic Integration with Apache Jena and Stanbol
Semantic Integration with Apache Jena and StanbolAll Things Open
 
2015 bioinformatics python_introduction_wim_vancriekinge_vfinal
2015 bioinformatics python_introduction_wim_vancriekinge_vfinal2015 bioinformatics python_introduction_wim_vancriekinge_vfinal
2015 bioinformatics python_introduction_wim_vancriekinge_vfinalProf. Wim Van Criekinge
 
Rest style web services (google protocol buffers) prasad nirantar
Rest style web services (google protocol buffers)   prasad nirantarRest style web services (google protocol buffers)   prasad nirantar
Rest style web services (google protocol buffers) prasad nirantarIndicThreads
 
Avro - More Than Just a Serialization Framework - CHUG - 20120416
Avro - More Than Just a Serialization Framework - CHUG - 20120416Avro - More Than Just a Serialization Framework - CHUG - 20120416
Avro - More Than Just a Serialization Framework - CHUG - 20120416Chicago Hadoop Users Group
 
Introduction to column oriented databases in PHP
Introduction to column oriented databases in PHPIntroduction to column oriented databases in PHP
Introduction to column oriented databases in PHPZend by Rogue Wave Software
 
Clojure talk at Münster JUG
Clojure talk at Münster JUGClojure talk at Münster JUG
Clojure talk at Münster JUGAlex Ott
 
Martin Odersky - Evolution of Scala
Martin Odersky - Evolution of ScalaMartin Odersky - Evolution of Scala
Martin Odersky - Evolution of ScalaScala Italy
 
OpenLDAP Replication Strategies
OpenLDAP Replication StrategiesOpenLDAP Replication Strategies
OpenLDAP Replication StrategiesGavin Henry
 
Something About Dynamic Linking
Something About Dynamic LinkingSomething About Dynamic Linking
Something About Dynamic LinkingWang Hsiangkai
 
Solr Query Parsing
Solr Query ParsingSolr Query Parsing
Solr Query ParsingErik Hatcher
 
FISL XIV - The ELF File Format and the Linux Loader
FISL XIV - The ELF File Format and the Linux LoaderFISL XIV - The ELF File Format and the Linux Loader
FISL XIV - The ELF File Format and the Linux LoaderJohn Tortugo
 
Neural Architectures for Named Entity Recognition
Neural Architectures for Named Entity RecognitionNeural Architectures for Named Entity Recognition
Neural Architectures for Named Entity RecognitionRrubaa Panchendrarajan
 

Tendances (20)

F# Type Provider for R Statistical Platform
F# Type Provider for R Statistical PlatformF# Type Provider for R Statistical Platform
F# Type Provider for R Statistical Platform
 
Yang in OpenDaylight
Yang in OpenDaylightYang in OpenDaylight
Yang in OpenDaylight
 
Installing & Configuring OpenLDAP (Hands On Lab)
Installing & Configuring OpenLDAP (Hands On Lab)Installing & Configuring OpenLDAP (Hands On Lab)
Installing & Configuring OpenLDAP (Hands On Lab)
 
The Semantics of SPARQL
The Semantics of SPARQLThe Semantics of SPARQL
The Semantics of SPARQL
 
Semantic Integration with Apache Jena and Stanbol
Semantic Integration with Apache Jena and StanbolSemantic Integration with Apache Jena and Stanbol
Semantic Integration with Apache Jena and Stanbol
 
2015 bioinformatics python_introduction_wim_vancriekinge_vfinal
2015 bioinformatics python_introduction_wim_vancriekinge_vfinal2015 bioinformatics python_introduction_wim_vancriekinge_vfinal
2015 bioinformatics python_introduction_wim_vancriekinge_vfinal
 
Rest style web services (google protocol buffers) prasad nirantar
Rest style web services (google protocol buffers)   prasad nirantarRest style web services (google protocol buffers)   prasad nirantar
Rest style web services (google protocol buffers) prasad nirantar
 
Avro - More Than Just a Serialization Framework - CHUG - 20120416
Avro - More Than Just a Serialization Framework - CHUG - 20120416Avro - More Than Just a Serialization Framework - CHUG - 20120416
Avro - More Than Just a Serialization Framework - CHUG - 20120416
 
Introduction to column oriented databases in PHP
Introduction to column oriented databases in PHPIntroduction to column oriented databases in PHP
Introduction to column oriented databases in PHP
 
Clojure talk at Münster JUG
Clojure talk at Münster JUGClojure talk at Münster JUG
Clojure talk at Münster JUG
 
Martin Odersky - Evolution of Scala
Martin Odersky - Evolution of ScalaMartin Odersky - Evolution of Scala
Martin Odersky - Evolution of Scala
 
OpenLDAP Replication Strategies
OpenLDAP Replication StrategiesOpenLDAP Replication Strategies
OpenLDAP Replication Strategies
 
ApacheCon09: Avro
ApacheCon09: AvroApacheCon09: Avro
ApacheCon09: Avro
 
Something About Dynamic Linking
Something About Dynamic LinkingSomething About Dynamic Linking
Something About Dynamic Linking
 
Avro intro
Avro introAvro intro
Avro intro
 
Solr Query Parsing
Solr Query ParsingSolr Query Parsing
Solr Query Parsing
 
FISL XIV - The ELF File Format and the Linux Loader
FISL XIV - The ELF File Format and the Linux LoaderFISL XIV - The ELF File Format and the Linux Loader
FISL XIV - The ELF File Format and the Linux Loader
 
Devoxx
DevoxxDevoxx
Devoxx
 
Introduction to Scala
Introduction to ScalaIntroduction to Scala
Introduction to Scala
 
Neural Architectures for Named Entity Recognition
Neural Architectures for Named Entity RecognitionNeural Architectures for Named Entity Recognition
Neural Architectures for Named Entity Recognition
 

Similaire à Enhancing Domain Specific Language Implementations Through Ontology

Suneel Marthi - Deep Learning with Apache Flink and DL4J
Suneel Marthi - Deep Learning with Apache Flink and DL4JSuneel Marthi - Deep Learning with Apache Flink and DL4J
Suneel Marthi - Deep Learning with Apache Flink and DL4JFlink Forward
 
Genomics Is Not Special: Towards Data Intensive Biology
Genomics Is Not Special: Towards Data Intensive BiologyGenomics Is Not Special: Towards Data Intensive Biology
Genomics Is Not Special: Towards Data Intensive BiologyUri Laserson
 
Apache Spark Workshop, Apr. 2016, Euangelos Linardos
Apache Spark Workshop, Apr. 2016, Euangelos LinardosApache Spark Workshop, Apr. 2016, Euangelos Linardos
Apache Spark Workshop, Apr. 2016, Euangelos LinardosEuangelos Linardos
 
Apache Spark sql
Apache Spark sqlApache Spark sql
Apache Spark sqlaftab alam
 
TI1220 Lecture 14: Domain-Specific Languages
TI1220 Lecture 14: Domain-Specific LanguagesTI1220 Lecture 14: Domain-Specific Languages
TI1220 Lecture 14: Domain-Specific LanguagesEelco Visser
 
PHP, The X DevAPI, and the MySQL Document Store -- Benelux PHP Confernece 2019
PHP, The X DevAPI, and the MySQL Document Store -- Benelux PHP Confernece 2019PHP, The X DevAPI, and the MySQL Document Store -- Benelux PHP Confernece 2019
PHP, The X DevAPI, and the MySQL Document Store -- Benelux PHP Confernece 2019Dave Stokes
 
PHP, The X DevAPI, and the MySQL Document Store Presented January 23rd, 20...
PHP,  The X DevAPI,  and the  MySQL Document Store Presented January 23rd, 20...PHP,  The X DevAPI,  and the  MySQL Document Store Presented January 23rd, 20...
PHP, The X DevAPI, and the MySQL Document Store Presented January 23rd, 20...Dave Stokes
 
"Data Provenance: Principles and Why it matters for BioMedical Applications"
"Data Provenance: Principles and Why it matters for BioMedical Applications""Data Provenance: Principles and Why it matters for BioMedical Applications"
"Data Provenance: Principles and Why it matters for BioMedical Applications"Pinar Alper
 
LuSql: (Quickly and easily) Getting your data from your DBMS into Lucene
LuSql: (Quickly and easily) Getting your data from your DBMS into LuceneLuSql: (Quickly and easily) Getting your data from your DBMS into Lucene
LuSql: (Quickly and easily) Getting your data from your DBMS into Luceneeby
 
これからのPerlプロダクトのかたち(YAPC::Asia 2013)
これからのPerlプロダクトのかたち(YAPC::Asia 2013)これからのPerlプロダクトのかたち(YAPC::Asia 2013)
これからのPerlプロダクトのかたち(YAPC::Asia 2013)goccy
 
Ingesting hdfs intosolrusingsparktrimmed
Ingesting hdfs intosolrusingsparktrimmedIngesting hdfs intosolrusingsparktrimmed
Ingesting hdfs intosolrusingsparktrimmedwhoschek
 
Introduction to Impala
Introduction to ImpalaIntroduction to Impala
Introduction to Impalamarkgrover
 
Integrating Deep Learning Libraries with Apache Spark
Integrating Deep Learning Libraries with Apache SparkIntegrating Deep Learning Libraries with Apache Spark
Integrating Deep Learning Libraries with Apache SparkDatabricks
 
Impala Architecture presentation
Impala Architecture presentationImpala Architecture presentation
Impala Architecture presentationhadooparchbook
 
Guglielmo iozzia - Google I/O extended dublin 2018
Guglielmo iozzia - Google  I/O extended dublin 2018Guglielmo iozzia - Google  I/O extended dublin 2018
Guglielmo iozzia - Google I/O extended dublin 2018Guglielmo Iozzia
 

Similaire à Enhancing Domain Specific Language Implementations Through Ontology (20)

Suneel Marthi - Deep Learning with Apache Flink and DL4J
Suneel Marthi - Deep Learning with Apache Flink and DL4JSuneel Marthi - Deep Learning with Apache Flink and DL4J
Suneel Marthi - Deep Learning with Apache Flink and DL4J
 
Genomics Is Not Special: Towards Data Intensive Biology
Genomics Is Not Special: Towards Data Intensive BiologyGenomics Is Not Special: Towards Data Intensive Biology
Genomics Is Not Special: Towards Data Intensive Biology
 
Apache Spark Workshop, Apr. 2016, Euangelos Linardos
Apache Spark Workshop, Apr. 2016, Euangelos LinardosApache Spark Workshop, Apr. 2016, Euangelos Linardos
Apache Spark Workshop, Apr. 2016, Euangelos Linardos
 
Apache Spark sql
Apache Spark sqlApache Spark sql
Apache Spark sql
 
TI1220 Lecture 14: Domain-Specific Languages
TI1220 Lecture 14: Domain-Specific LanguagesTI1220 Lecture 14: Domain-Specific Languages
TI1220 Lecture 14: Domain-Specific Languages
 
PHP, The X DevAPI, and the MySQL Document Store -- Benelux PHP Confernece 2019
PHP, The X DevAPI, and the MySQL Document Store -- Benelux PHP Confernece 2019PHP, The X DevAPI, and the MySQL Document Store -- Benelux PHP Confernece 2019
PHP, The X DevAPI, and the MySQL Document Store -- Benelux PHP Confernece 2019
 
PHP, The X DevAPI, and the MySQL Document Store Presented January 23rd, 20...
PHP,  The X DevAPI,  and the  MySQL Document Store Presented January 23rd, 20...PHP,  The X DevAPI,  and the  MySQL Document Store Presented January 23rd, 20...
PHP, The X DevAPI, and the MySQL Document Store Presented January 23rd, 20...
 
"Data Provenance: Principles and Why it matters for BioMedical Applications"
"Data Provenance: Principles and Why it matters for BioMedical Applications""Data Provenance: Principles and Why it matters for BioMedical Applications"
"Data Provenance: Principles and Why it matters for BioMedical Applications"
 
LuSql: (Quickly and easily) Getting your data from your DBMS into Lucene
LuSql: (Quickly and easily) Getting your data from your DBMS into LuceneLuSql: (Quickly and easily) Getting your data from your DBMS into Lucene
LuSql: (Quickly and easily) Getting your data from your DBMS into Lucene
 
これからのPerlプロダクトのかたち(YAPC::Asia 2013)
これからのPerlプロダクトのかたち(YAPC::Asia 2013)これからのPerlプロダクトのかたち(YAPC::Asia 2013)
これからのPerlプロダクトのかたち(YAPC::Asia 2013)
 
Ingesting hdfs intosolrusingsparktrimmed
Ingesting hdfs intosolrusingsparktrimmedIngesting hdfs intosolrusingsparktrimmed
Ingesting hdfs intosolrusingsparktrimmed
 
Introduction to Impala
Introduction to ImpalaIntroduction to Impala
Introduction to Impala
 
HDF-EOS Java Application Programming Interfaces
HDF-EOS Java Application Programming InterfacesHDF-EOS Java Application Programming Interfaces
HDF-EOS Java Application Programming Interfaces
 
Integrating Deep Learning Libraries with Apache Spark
Integrating Deep Learning Libraries with Apache SparkIntegrating Deep Learning Libraries with Apache Spark
Integrating Deep Learning Libraries with Apache Spark
 
Php
PhpPhp
Php
 
Php
PhpPhp
Php
 
Php
PhpPhp
Php
 
Java >= 9
Java >= 9Java >= 9
Java >= 9
 
Impala Architecture presentation
Impala Architecture presentationImpala Architecture presentation
Impala Architecture presentation
 
Guglielmo iozzia - Google I/O extended dublin 2018
Guglielmo iozzia - Google  I/O extended dublin 2018Guglielmo iozzia - Google  I/O extended dublin 2018
Guglielmo iozzia - Google I/O extended dublin 2018
 

Dernier

WSO2CON2024 - It's time to go Platformless
WSO2CON2024 - It's time to go PlatformlessWSO2CON2024 - It's time to go Platformless
WSO2CON2024 - It's time to go PlatformlessWSO2
 
Crypto Cloud Review - How To Earn Up To $500 Per DAY Of Bitcoin 100% On AutoP...
Crypto Cloud Review - How To Earn Up To $500 Per DAY Of Bitcoin 100% On AutoP...Crypto Cloud Review - How To Earn Up To $500 Per DAY Of Bitcoin 100% On AutoP...
Crypto Cloud Review - How To Earn Up To $500 Per DAY Of Bitcoin 100% On AutoP...SelfMade bd
 
Large-scale Logging Made Easy: Meetup at Deutsche Bank 2024
Large-scale Logging Made Easy: Meetup at Deutsche Bank 2024Large-scale Logging Made Easy: Meetup at Deutsche Bank 2024
Large-scale Logging Made Easy: Meetup at Deutsche Bank 2024VictoriaMetrics
 
%in Rustenburg+277-882-255-28 abortion pills for sale in Rustenburg
%in Rustenburg+277-882-255-28 abortion pills for sale in Rustenburg%in Rustenburg+277-882-255-28 abortion pills for sale in Rustenburg
%in Rustenburg+277-882-255-28 abortion pills for sale in Rustenburgmasabamasaba
 
+971565801893>>SAFE AND ORIGINAL ABORTION PILLS FOR SALE IN DUBAI AND ABUDHAB...
+971565801893>>SAFE AND ORIGINAL ABORTION PILLS FOR SALE IN DUBAI AND ABUDHAB...+971565801893>>SAFE AND ORIGINAL ABORTION PILLS FOR SALE IN DUBAI AND ABUDHAB...
+971565801893>>SAFE AND ORIGINAL ABORTION PILLS FOR SALE IN DUBAI AND ABUDHAB...Health
 
The title is not connected to what is inside
The title is not connected to what is insideThe title is not connected to what is inside
The title is not connected to what is insideshinachiaurasa2
 
%in Harare+277-882-255-28 abortion pills for sale in Harare
%in Harare+277-882-255-28 abortion pills for sale in Harare%in Harare+277-882-255-28 abortion pills for sale in Harare
%in Harare+277-882-255-28 abortion pills for sale in Hararemasabamasaba
 
%in ivory park+277-882-255-28 abortion pills for sale in ivory park
%in ivory park+277-882-255-28 abortion pills for sale in ivory park %in ivory park+277-882-255-28 abortion pills for sale in ivory park
%in ivory park+277-882-255-28 abortion pills for sale in ivory park masabamasaba
 
Announcing Codolex 2.0 from GDK Software
Announcing Codolex 2.0 from GDK SoftwareAnnouncing Codolex 2.0 from GDK Software
Announcing Codolex 2.0 from GDK SoftwareJim McKeeth
 
%+27788225528 love spells in Knoxville Psychic Readings, Attraction spells,Br...
%+27788225528 love spells in Knoxville Psychic Readings, Attraction spells,Br...%+27788225528 love spells in Knoxville Psychic Readings, Attraction spells,Br...
%+27788225528 love spells in Knoxville Psychic Readings, Attraction spells,Br...masabamasaba
 
%in Benoni+277-882-255-28 abortion pills for sale in Benoni
%in Benoni+277-882-255-28 abortion pills for sale in Benoni%in Benoni+277-882-255-28 abortion pills for sale in Benoni
%in Benoni+277-882-255-28 abortion pills for sale in Benonimasabamasaba
 
%in Soweto+277-882-255-28 abortion pills for sale in soweto
%in Soweto+277-882-255-28 abortion pills for sale in soweto%in Soweto+277-882-255-28 abortion pills for sale in soweto
%in Soweto+277-882-255-28 abortion pills for sale in sowetomasabamasaba
 
WSO2CON 2024 - Does Open Source Still Matter?
WSO2CON 2024 - Does Open Source Still Matter?WSO2CON 2024 - Does Open Source Still Matter?
WSO2CON 2024 - Does Open Source Still Matter?WSO2
 
AI & Machine Learning Presentation Template
AI & Machine Learning Presentation TemplateAI & Machine Learning Presentation Template
AI & Machine Learning Presentation TemplatePresentation.STUDIO
 
%in tembisa+277-882-255-28 abortion pills for sale in tembisa
%in tembisa+277-882-255-28 abortion pills for sale in tembisa%in tembisa+277-882-255-28 abortion pills for sale in tembisa
%in tembisa+277-882-255-28 abortion pills for sale in tembisamasabamasaba
 
%in Hazyview+277-882-255-28 abortion pills for sale in Hazyview
%in Hazyview+277-882-255-28 abortion pills for sale in Hazyview%in Hazyview+277-882-255-28 abortion pills for sale in Hazyview
%in Hazyview+277-882-255-28 abortion pills for sale in Hazyviewmasabamasaba
 
OpenChain - The Ramifications of ISO/IEC 5230 and ISO/IEC 18974 for Legal Pro...
OpenChain - The Ramifications of ISO/IEC 5230 and ISO/IEC 18974 for Legal Pro...OpenChain - The Ramifications of ISO/IEC 5230 and ISO/IEC 18974 for Legal Pro...
OpenChain - The Ramifications of ISO/IEC 5230 and ISO/IEC 18974 for Legal Pro...Shane Coughlan
 
%+27788225528 love spells in Huntington Beach Psychic Readings, Attraction sp...
%+27788225528 love spells in Huntington Beach Psychic Readings, Attraction sp...%+27788225528 love spells in Huntington Beach Psychic Readings, Attraction sp...
%+27788225528 love spells in Huntington Beach Psychic Readings, Attraction sp...masabamasaba
 
Architecture decision records - How not to get lost in the past
Architecture decision records - How not to get lost in the pastArchitecture decision records - How not to get lost in the past
Architecture decision records - How not to get lost in the pastPapp Krisztián
 

Dernier (20)

WSO2CON2024 - It's time to go Platformless
WSO2CON2024 - It's time to go PlatformlessWSO2CON2024 - It's time to go Platformless
WSO2CON2024 - It's time to go Platformless
 
Crypto Cloud Review - How To Earn Up To $500 Per DAY Of Bitcoin 100% On AutoP...
Crypto Cloud Review - How To Earn Up To $500 Per DAY Of Bitcoin 100% On AutoP...Crypto Cloud Review - How To Earn Up To $500 Per DAY Of Bitcoin 100% On AutoP...
Crypto Cloud Review - How To Earn Up To $500 Per DAY Of Bitcoin 100% On AutoP...
 
Large-scale Logging Made Easy: Meetup at Deutsche Bank 2024
Large-scale Logging Made Easy: Meetup at Deutsche Bank 2024Large-scale Logging Made Easy: Meetup at Deutsche Bank 2024
Large-scale Logging Made Easy: Meetup at Deutsche Bank 2024
 
%in Rustenburg+277-882-255-28 abortion pills for sale in Rustenburg
%in Rustenburg+277-882-255-28 abortion pills for sale in Rustenburg%in Rustenburg+277-882-255-28 abortion pills for sale in Rustenburg
%in Rustenburg+277-882-255-28 abortion pills for sale in Rustenburg
 
+971565801893>>SAFE AND ORIGINAL ABORTION PILLS FOR SALE IN DUBAI AND ABUDHAB...
+971565801893>>SAFE AND ORIGINAL ABORTION PILLS FOR SALE IN DUBAI AND ABUDHAB...+971565801893>>SAFE AND ORIGINAL ABORTION PILLS FOR SALE IN DUBAI AND ABUDHAB...
+971565801893>>SAFE AND ORIGINAL ABORTION PILLS FOR SALE IN DUBAI AND ABUDHAB...
 
The title is not connected to what is inside
The title is not connected to what is insideThe title is not connected to what is inside
The title is not connected to what is inside
 
%in Harare+277-882-255-28 abortion pills for sale in Harare
%in Harare+277-882-255-28 abortion pills for sale in Harare%in Harare+277-882-255-28 abortion pills for sale in Harare
%in Harare+277-882-255-28 abortion pills for sale in Harare
 
%in ivory park+277-882-255-28 abortion pills for sale in ivory park
%in ivory park+277-882-255-28 abortion pills for sale in ivory park %in ivory park+277-882-255-28 abortion pills for sale in ivory park
%in ivory park+277-882-255-28 abortion pills for sale in ivory park
 
Announcing Codolex 2.0 from GDK Software
Announcing Codolex 2.0 from GDK SoftwareAnnouncing Codolex 2.0 from GDK Software
Announcing Codolex 2.0 from GDK Software
 
%+27788225528 love spells in Knoxville Psychic Readings, Attraction spells,Br...
%+27788225528 love spells in Knoxville Psychic Readings, Attraction spells,Br...%+27788225528 love spells in Knoxville Psychic Readings, Attraction spells,Br...
%+27788225528 love spells in Knoxville Psychic Readings, Attraction spells,Br...
 
%in Benoni+277-882-255-28 abortion pills for sale in Benoni
%in Benoni+277-882-255-28 abortion pills for sale in Benoni%in Benoni+277-882-255-28 abortion pills for sale in Benoni
%in Benoni+277-882-255-28 abortion pills for sale in Benoni
 
%in Soweto+277-882-255-28 abortion pills for sale in soweto
%in Soweto+277-882-255-28 abortion pills for sale in soweto%in Soweto+277-882-255-28 abortion pills for sale in soweto
%in Soweto+277-882-255-28 abortion pills for sale in soweto
 
WSO2CON 2024 - Does Open Source Still Matter?
WSO2CON 2024 - Does Open Source Still Matter?WSO2CON 2024 - Does Open Source Still Matter?
WSO2CON 2024 - Does Open Source Still Matter?
 
AI & Machine Learning Presentation Template
AI & Machine Learning Presentation TemplateAI & Machine Learning Presentation Template
AI & Machine Learning Presentation Template
 
Microsoft AI Transformation Partner Playbook.pdf
Microsoft AI Transformation Partner Playbook.pdfMicrosoft AI Transformation Partner Playbook.pdf
Microsoft AI Transformation Partner Playbook.pdf
 
%in tembisa+277-882-255-28 abortion pills for sale in tembisa
%in tembisa+277-882-255-28 abortion pills for sale in tembisa%in tembisa+277-882-255-28 abortion pills for sale in tembisa
%in tembisa+277-882-255-28 abortion pills for sale in tembisa
 
%in Hazyview+277-882-255-28 abortion pills for sale in Hazyview
%in Hazyview+277-882-255-28 abortion pills for sale in Hazyview%in Hazyview+277-882-255-28 abortion pills for sale in Hazyview
%in Hazyview+277-882-255-28 abortion pills for sale in Hazyview
 
OpenChain - The Ramifications of ISO/IEC 5230 and ISO/IEC 18974 for Legal Pro...
OpenChain - The Ramifications of ISO/IEC 5230 and ISO/IEC 18974 for Legal Pro...OpenChain - The Ramifications of ISO/IEC 5230 and ISO/IEC 18974 for Legal Pro...
OpenChain - The Ramifications of ISO/IEC 5230 and ISO/IEC 18974 for Legal Pro...
 
%+27788225528 love spells in Huntington Beach Psychic Readings, Attraction sp...
%+27788225528 love spells in Huntington Beach Psychic Readings, Attraction sp...%+27788225528 love spells in Huntington Beach Psychic Readings, Attraction sp...
%+27788225528 love spells in Huntington Beach Psychic Readings, Attraction sp...
 
Architecture decision records - How not to get lost in the past
Architecture decision records - How not to get lost in the pastArchitecture decision records - How not to get lost in the past
Architecture decision records - How not to get lost in the past
 

Enhancing Domain Specific Language Implementations Through Ontology

  • 1. This work was performed under the auspices of the U.S. Department of Energy by Lawrence Livermore National Laboratory under contract DE-AC52-07NA27344. Lawrence Livermore National Security, LLC Enhancing  Domain  Specific  Language  Implementa5ons   Through  Ontology   WOLFHPC15:  Fi-h  Interna5onal  Workshop  on  Domain-­‐Specific  Languages  and  High-­‐Level   Frameworks  for  High  Performance  Compu5ng   Chunhua  “Leo”  Liao,    Pei-­‐Hung  Lin  ,  Daniel  J.  Quinlan,       Yue  Zhao*  ,  and  Xipeng  Shen*   Nov. 16, 2015 LLNL-PRES-679077 *
  • 2. 2   §  Mo5va5on   §  Methodology   —  Ontology-­‐based  knowledge  base   —  Compiler/run5me  interface     §  Evalua5on   —  Ontology  for  Stencil  computa5on   —  Compiler  implementa5on   —  Preliminary  results   §  Related  work   §  Discussion   Outline  
  • 3. 3   §  DSLs:  aZrac5ve  to  program  HPC  machines   —  High-­‐level  annota5ons  with  rich  seman5cs  to  efficiently  express  domain  algorithms   —  Performance:  DSL  implementa5ons  (compilers  +  run5me  systems)   §  Seman5c  gap:  Large  body  of  knowledge  is  required  to  build  efficient  implementa5ons   —  Applica5on  domain  op5miza5on  knowledge   —  Language  seman5cs:  DSLs  and    host  languages   —  Library  interface  seman5cs   —  Hardware  architecture  features     §  Current  problem:    all  the  knowledge  is  implicitly  assumed  or  represented  using  ad-­‐hoc   approaches   —  informal,    not  reusable,    not  scalable   §  Our  solu5on:  adapt  modern  knowledge-­‐engineering  method  to  enhance  DSL   implementa5ons   Mo5va5on  
  • 4. 4   §  Applica5on  domains   —  Data:  grid,  points,  stencil,  halo,  …   —  Algorithms:  Itera5ve  algorithm,  convergence     §  Language  seman5cs:  general-­‐purpose  languages  and  DSLs   —  non-­‐aliasing,  non-­‐overlapping,  fixed-­‐sized  vs  resizable   —  func5ons  :  read/write  variable  sets,  pure,  virtual     §  Library  knowledge:  standard  and  domain-­‐specific   —   Containers:    unique,  ordered,  sorted,  con5nuous  storage   —  Iterators:  random  access,  bidirec5onal   §  HPC  hardware  architecture  configura5ons   —  CPUs:  number  of  cores,  cache  sizes     —  NVIDIA  GPU:    shared  memory,    global  memory,  constant   memory   What  knowledge?   Application Domain Hardware Languages (DSL, GPLs) Libraries Runtimes Operating System HPC software/hardware stack
  • 5. 5   DSL  knowledge  is  oBen  hard  to  extract,  cri5cal  to  performance   §  High-­‐level  abstrac5ons  in  DSL   —  Encouraging  code  reuse  and  hiding  implementa5on  from  interfaces  to  reduce  so-ware   complexity   —  Func5ons,  data  structures,  classes  and  templates  …   —  Could  be  standard  or  user-­‐defined/domain-­‐specific   §  Seman5cs  of  high-­‐level  abstrac5ons   —  Cri5cal  to  op5miza5ons,  including  paralleliza5on   •  Read-­‐only  seman5cs   —  Hard  to  be  extracted  by  sta5c  analysis     •  STL  vector  implementa5on  ?à?  elements  are  con5guous  in  memory   §  Conven5onal  compilers  lose  track  of  abstrac5ons   —  Analyses  and  op5miza5ons  are  mostly  done  on  top  of  middle  or  low  level  IR   —  Hard  to  trace  back  to  the  high-­‐level  abstrac5ons  represented  in  source  level  
  • 6. 6   §  Informal  and  Ad-­‐Hoc     §  Isolated:    separated  managing  so-ware   and  hardware  knowledge   §  Hard  to  reusable,  not  scalable   §  No  toolchain  support:     —  Parser:    each  solu5on  has  its  own  parser   —  Lack  of  IDE,  valida5on  tools,  etc.     §  Not  easy  to  share  among  DSL  designers,     domain  experts  and  DSL  developers   Problems  with  current  HPC  knowledge  management  approaches     class std::vector<T> { alias none; overlap none; //elements are alias-free and non-overlapping is_fixed_sized_array { //semantic-preserving functions as a fixed-sized array length(i) = {this.size()}; element(i) = {this.operator[](i); this.at(i);}; }; }; void my_processing(SgNode* func_def) { read{func_def}; modify {func_def}; //side effects of a function } std::list<SgFunctionDef*> findCFunctionDefinition(SgNode* root){ read {root}; modify {result}; return unique; //return a unique set } void Compass::OutputObject::addOutput(SgNode* node){ read {node}; //order-independent side effects modify {Compass::OutputObject::outputList<order_independent>}; } Semantic specification file Liao’10 Memory Specification Language: Chen’14
  • 7. 7   A  new  ontology-­‐driven  DSL  implementa5on  paradigm   Application Domains Domain Specific Languages General-Purpose Languages Libraries Hardware Architectures A Knowledge Base using OWL ---------- [upper ontology] ... [domain ontologies] Relevant knowledge domains ... Compiler Runtime DSL Implementations Domain Experts DSL Designers DSL developers Architects ... End-user tools/ APIs
  • 8. 8   §  Defini5ons:     —  Philosophy:  the  study  of  nature  of  beings  and   their  rela5ons   —  Modern:  a  formal  specifica5on  for  explicitly   represen5ng  knowledge  of  the  en55es  in  a   domain   §  Provides  a  common  vocabulary  in  a  domain   —  Concepts,  proper5es,  and  individuals   §  Theory  founda5on:  descrip5on  logics  (DLs),  a   family  of  logical  languages  for  knowledge   representa5on   —  several  dialects  with  different  expressiveness   and  efficiencies  (for  reasoning)     Central  component:  ontology-­‐based  knowledge  base  
  • 9. 9   §  Web  Ontology  Language  (OWL):  the  most  popular  ontology  languages   —  Classes  (or  concepts):  denote  sets  of  individuals.    organized  in  a  hierarchy   —  Individuals  (or  instances):  single  instances  in  the  domain   —  Proper5es  (or  rela5ons):    binary  rela5ons  between  en55es   —  Maturing  tool  chain  since  ’04  by  W3C:  Protégé  IDE,  OWL  API,  FaCT++,  SWI-­‐Prolog,    etc.   Web  Ontology  Language  
  • 10. 10   Example  family  ontology  using  OWL   Person Man Women John Mary instanceOf subClassOf subClassOf instanceOf hasWife hasHusband Prefix(:=<http://example.com/owl/families/>) Ontology(<http://example.com/owl/families> Declaration( NamedIndividual( :John ) ) Declaration( NamedIndividual( :Mary ) ) Declaration( Class( :Person ) ) Declaration( Class( :Woman ) ) Declaration( ObjectProperty( :hasWife ) ) Declaration( ObjectProperty( :hasSpouse) ) SubClassOf( :Woman :Person ) SubClassOf( :Man :Person ) ClassAssertion(:Man :John) ClassAssertion(:Women :Mary) SubObjectPropertyOf( :hasWife :hasSpouse ) ObjectPropertyAssertion( :hasWife :John :Mary ) ... ) Family ontology in functional syntax
  • 11. 11   SoBware  and  hardware  ontology  for  DSL  implementa5ons   Hardware Cplusplus Program owl:Thing Processor Memory Type Statement Expression CPU GPU Variable .... MainMemory Cache Xeon Kepler .... IterationStatement AssignmentStatement DeclarationStatement .... Library §  Requirements:   —  common  vocabulary   —  intui5ve,  efficient  to  facilitate  DSL  analyses   and  op5miza5ons   §  So-ware:    based  on  C  and  C++  language   standards   —  Individuals:    use  interna5onal  resource  iden5fiers   (IRIs)  –  “hZp://example.com/owl/CProgram:Type”   •  Scoped  IRI  -­‐  qualified  name  +  rela5ve  loca5on   •  E.g.:  file.c::foo()::1:6,1:10          std::vector<T>,         std::vector<int>,     §  Hardware:    focus  on  single  machine  for  now   —  Individuals:    machine1,  cpu2,  memory  ,  iden5fied  by   names    or  serial  numbers   §  Space  Efficiency  -­‐  core  knowledge  base  +  loadable   supplemental  modules  (for  individuals)   Properties: aliasing, read, write, hasType, hasScope, calledBy, … Properties: sub class of, part of , frequency, has-a, …
  • 12. 12   Excerpt  of  SoBware  and  Hardware  Ontology  with  Individuals   % program concepts and their hierarchy Class(:PointerType) SubClassOf(:PointerType :DerivedType) Class(:BinaryOp) Class(:AddOp) SubClassOf(:AddOp :BinaryOp) SubClassOf(:SelectionStatement :Statement) SubClassOf(:IfStatement :SelectionStatement) % define the relations between program constructs ObjectProperty(:hasScope) ObjectProperty(:hasType) ObjectProperty( :alias) ObjectProperty(:read) % variables identified by a qualified names NamedIndividual ( :var1) NamedIndividual ( :var2) % two pointer variables alias each other ObjectPropertyAssertion(:hasType :var1 :PointerType) … ObjectPropertyAssertion (:alias :var1 :var2) % hardware concepts and hierarchy Class(:Xeon) Class(:CPU) SubClassOf(:Xeon :CPU ) Class( :X5680 ) SubClassOf( :X5680 :Xeon ) ClassAssertion(:X5680 :cpu1) Class(:GPU) SubClassOf(:Quadro_4000 :GPU) SubClassOf(:Quadro_4000 DataHasValue(:numberOfCores "256")) % Individuals NamedIndividual(:gpu1) NamedIndividual(:cpu1) ClassAssertion(:Quadro_4000 :gpu1) % a workstation with CPU and GPU NamedIndividual(:tux322) ClassAssertion(:computer :tux322) ObjectPropertyAssertion(:hasPart :tux322 :gpu1) ObjectPropertyAssertion(:hasPart :tux322 :cpu2)
  • 13. 13   §  Requirements   —  Bidirec5onal:    query  +  inser5on   —  Support  both  on-­‐disk  and  in-­‐memory  storage   §  Knowledge  representa5on:  OWL   —  On  disk  as  OWL  files     —  In-­‐memory  storage:    SWI-­‐Prolog  predicates   §  Two  kinds  of  interfaces   —  Prolog  reasoning  engine:  powerful  declara5ve   Prolog  queries  opera5ng  on  in-­‐memory  storage   •  Learning  curve   —  Prebuilt  C/C++  query  interface:    light  weight   opera5ng  on  OWL  files,  easy  for  deployment   Compiler  and  run5me  interface  of  ontology-­‐based  knowledge   base   % Prolog query interfaces % Define additional predicates checkClass(IndividualL, ClassL) :- rdf(IndividualL, rdf:type, ClassL). % recursively check through subclasses checkClass(IndividualL, ClassL) :- rdf(SubClassL, rdfs:subClassOf, ClassL), checkClass(IndividualL, SubClassL). % if individual tux322 is a computer? ?- checkClass (‘tux322’, ‘:computer’) % What are the variables written by function bar? ?- Function(X),hasName(X,'bar'),writtenBy(Vars,X) // C++ query interface std::string getMachineName (string machine_IRI); vector<std::string> getCPUs (string machine_name); vector<std::string> getGPUs (string machine_name); int getCoreCount (string gpu_IRI);
  • 14. 14   Evalua5on  benefits  of  ontology  for  DSL  implementa5ons   Ontology-based knowledge base for DSL implementations ROSE Frontend Knowledge Generator DSL Program Ontology-Based Knowledge Base (DSL, Program, Hardware) AST AutoPar PolyOpt Protege Domain experts DSL designers Programmers Architects C++ Program ROSE Unparser DSL Lowering SWI-Prolog Semantic Web Library ROSE Midend §  The  DSL:  Shi-  Calculus   —  Light-­‐weight  embedded  DSL   —  Host  language:  C++   —  Leverage  Chombo  AMR  library   [Colella’09]   §  The  implementa5on:  a   Shi-Calculus  source-­‐to-­‐source   translator   —  Built  on  top  of  ROSE,  a  source-­‐to-­‐ source  compiler  infrastructure   developed  at  LLNL   —  Connec5ng  to  ontology   HOMP (CUDA Generation) OpenMP Lowering
  • 15. 15   ShiBCalculus  DSL  syntax  and  seman5cs  
  • 16. 16   ShiBCalculus  DSL  for  stencil  computa5on   #define DIM 3 int main(int argc, char* argv[]) { Point lo, hi; // Space discretization Box bxdest(lo,hi); Box bxsrc=bxdest.grow(1); ... // Source and destination data containers RectMDArray<double,1> Asrc(bxsrc); RectMDArray<double,1> Adest(bxdest); ... double ident, C0; // Shift and Stencil declarations array<Shift,DIM> shft_vec = getShiftVec(); Stencil<double> laplace = C0*(shft_vec^zero); // Stencil formation using Shift for (int dir=0;dir<DIM;dir++) { Point thishft = getUnitv(dir); laplace = laplace + ident*(shft_vec^thishft); laplace = laplace + ident*(shft_vec^(thishft*(-1))); } // Apply stencil computation using data containers in space Stencil<double>::apply(laplace, Asrc, Adest, bxdest); } Constructing stencil at the initial point (red cell in the figure). CO is the coefficient applied to this stencil cell. Iteratively constructing stencil for the surrounding points (green cells in figure). “ident” is the coefficient applied to these stencil cells. Applying the stencil, source data, destination data, and destination box to the computation. Destination box represents the loops in the computation. 3-D 7-point stencil
  • 17. 17   Generated  sequen5al  C++  output  for  Laplacian  example   1 int main (int argc , char * argv []) 2 { 3 ... 4 const class Point lo( zero ); 5 const class Point hi = getOnes () * adjustedBlockSize ; 6 const class Box bxdest (lo ,hi); 7 const class Box bxsrc = bxdest . grow (1) ; 8 class RectMDArray < double , 1 , 1 , 1 > Asrc ( bxsrc ); 9 class RectMDArray < double , 1 , 1 , 1 > Adest ( bxdest ); 10 const double ident = 1.0; 11 const double C0 = -6.00000; 12 ... 13 double * sourceDataPointer = Asrc.getPointer (); 14 double * destinationDataPointer = Adest.getPointer (); 15 for (k = lb2; k < ub2; ++k) { 16 for (j = lb1; j < ub1; ++j) { 17 for (i = lb0; i < ub0; ++i) { 18 destinationDataPointer [ arraySize_X * ( arraySize_Y * k + j) + i] = sourceDataPointer [ arraySize_X * ( arraySize_Y * (k + -1) + j)+ i] + sourceDataPointer [ arraySize_X * ( arraySize_Y * (k + 1)+ j) + i] + sourceDataPointer [ arraySize_X * ( arraySize_Y * k+ (j + -1)) + i] + sourceDataPointer [ arraySize_X * (arraySize_Y * k + (j + 1)) + i] + sourceDataPointer [ arraySize_X * ( arraySize_Y * k + j) + (i + -1)] + sourceDataPointer [ arraySize_X * ( arraySize_Y * k + j) + (i +1)] + sourceDataPointer [ arraySize_X * ( arraySize_Y * k + j) +i] * -6.00000; … 22 } } } } More efficient code generation ( parallelization, vectorization, loop tiling, GPU porting, etc.) need: 1.  Semantics of high level constructs double * RectMDArray<>::getPointer() 2. Hardware details for target platforms CPU, GPU, cache, memory size, etc.
  • 18. 18   Connection between Ontology and DSL implementations Shift Calculus DSL Hardware CplusplusProgram ChomboLibrary owl:Thing Processor Memory Type Statement Expression Stencil Chombo::Box CPU GPU RectMDArray Variable .... MainMemory Cache Xeon Kepler .... .... IterationStatement AssignmentStatement DeclarationStatement Shift Point .... Coefficient Software semantics to enable parallelization HW specification In code generation ObjectProperty •  alias, read, write, …. DataProperty •  Frequency, cache size
  • 19. 19   §  Knowledge  genera5on:  traverse  AST  to  generate  a  program’s  concrete  individuals   and  rela5on  instances   —  Individual  language  constructs:       •  func5ons,  loops,  types,  variable  references,  statements,  func5on  calls,  etc.   —  Rela5ons:     •  Structural:  hasName,  hasParent,  hasScope,  calledBy,  returnedBy  ….   •  Analysis:  alias,    access  (read,  write),  overlap,  dependent  (true,  an5,  output)  ,  …   §  Seman5cs  propaga5on     —  Across  different  levels  of  IRs:  transforma5on  tracking   •  A  special  model:    no  memory  reuse  for  AST  nodes  to  ensure  unique  IDs   •  API  func5ons  to  store  input  nodes  for  a  generated  node:    m:  n  mapping   •  Update  knowledge  base  with  subClassOf(:low-­‐level-­‐en5ty  :high-­‐level-­‐en5ty)  to  connect  these  en55es     •  Queries  on  low-­‐level  en55es  can  return  seman5cs  associated  with  high-­‐level  en55es.   —  Func5on  calls:        a  =  foo(b):        read/write  formal    parameters  à  actual  parameters   More  details  about  compiler  interac5ons  with  ontology  
  • 20. 20   Output Variants Compiler/ Optimizations Execution time (sec.) C++ serial Baseline 1.51482 C++ OpenMP Parallel AutoPar 0.380562 C++ Polyhedral Tiled+parallel PolyOpt 0.503307 CUDA w/ data transfer HOMP 9.29446 CUDA w/o data transfer HOMP 3.14713e-05 Preliminary  results   §  Configura5on  of  tes5ng  pla{orm:   —  24-­‐core  worksta5on  with  Intel  Xeon  CPU  E5-­‐2620  V.3  and  64  GB  memory.     —  GCC  version  4.8.3,  NVCC  compiler  version  7.0.  
  • 21. 21   §  Ontology-­‐based  knowledge  bases   —  General  purpose:  Cyc[Matuszek’06],  SUMO[Pease’02]   —  Domain-­‐specific:    Gene    ontology[Ashburner’00]  (Biology),    KnowRob  [Tenorth’09](service   robots),  Human  behaviors[Rodriguez’14]   §  Knowledge  engineering  methodologies  applied  to  DSL  analysis  and  design   —  Ontology-­‐based  domain  analysis:  [Tairas’09],     —  Transla5ng  Ontology  to  DSL  grammars:    [Ceh’11]   —  Domain-­‐specific  modeling  languages:  [Brauer’08],  [Walter’09]   §  Our  work  :  first  to  use  ontology  to  enhance  DSL  implementa5ons   Related  work  
  • 22. 22   §  A  novel  ontology-­‐based  knowledge  base  to  enhance  DSL  implementa5ons   —  Enable  easy  knowledge  accumula5on,  reuse  and  sharing  among  so-ware  and  human  users   —  Formal,  explicit  and  uniform  format  using  OWL   —  Ini5al  concepts  and  rela5ons  modeled  for  so-ware  and  hardware  domains   —  Bidirec5onal  connec5on  to  compilers   —  Prototype  Shi-Calculus  DSL  implementa5on  enabled  by  ontology   §  Ongoing/Future  work:       —  Collec5ng  and  represen5ng  stencil  op5miza5on  knowledge   —  Context-­‐aware  op5miza5on  advisors   —  Declara5ve,  generic  program  analysis  and  transforma5on   Discussion  
  • 24. 24   Int main(){ … { int c0; int c3; int c4; int c5; int c1; int c2; if (lb0 <= ub0 + -1 && lb1 <= ub1 + -1 && lb2 <= ub2 + -1) { #pragma omp parallel for private(c2, c1, c5, c4, c3) for (c0 = ((lb2 + -31) * 32 < 0?-(-(lb2 + -31) / 32) : ((32 < 0?(-(lb2 + -31) + - 32 - 1) / - 32 : (lb2 + -31 + 32 - 1) / 32))); c0 <= (((ub2 + -1) * 32 < 0?((32 < 0?-((-(ub2 + -1) + 32 + 1) / 32) : -((-(ub2 + -1) + 32 - 1) / 32))) : (ub2 + -1) / 32)); c0++) { for (c1 = ((lb1 + -31) * 32 < 0?-(-(lb1 + -31) / 32) : ((32 < 0?(-(lb1 + -31) + - 32 - 1) / - 32 : (lb1 + -31 + 32 - 1) / 32))); c1 <= (((ub1 + -1) * 32 < 0?((32 < 0?-((-(ub1 + -1) + 32 + 1) / 32) : -((-(ub1 + -1) + 32 - 1) / 32))) : (ub1 + -1) / 32)); c1++) { for (c2 = ((lb0 + -17) * 18 < 0?-(-(lb0 + -17) / 18) : ((18 < 0?(-(lb0 + -17) + - 18 - 1) / - 18 : (lb0 + -17 + 18 - 1) / 18))); c2 <= (((ub0 + -1) * 18 < 0?((18 < 0?-((-(ub0 + -1) + 18 + 1) / 18) : -((-(ub0 + -1) + 18 - 1) / 18))) : (ub0 + -1) / 18)); c2++) { for (c3 = (32 * c0 > lb2?32 * c0 : lb2); c3 <= ((32 * c0 + 31 < ub2 + -1?32 * c0 + 31 : ub2 + -1)); c3++) { for (c4 = (32 * c1 > lb1?32 * c1 : lb1); c4 <= ((32 * c1 + 31 < ub1 + -1?32 * c1 + 31 : ub1 + -1)); c4++) { #pragma ivdep #pragma vector always #pragma simd for (c5 = (18 * c2 > lb0?18 * c2 : lb0); c5 <= ((18 * c2 + 17 < ub0 + -1?18 * c2 + 17 : ub0 + -1)); c5++) {/* tiled for 3 dims */ destinationDataPointer[c3][c4][c5] = sourceDataPointer[c3 + - 1][c4][c5] + sourceDataPointer[c3 + 1][c4][c5] + sourceDataPointer[c3][c4 + - 1][c5] + sourceDataPointer[c3][c4 + 1][c5] + sourceDataPointer[c3][c4][c5 + - 1] + sourceDataPointer[c3][c4][c5 + 1] + sourceDataPointer[c3][c4][c5] * - 6.00000; … } } } } } } } } } Tiled  &  parallelized  (w/  OMP  and  SIMD)  C++  output  
  • 25. 25   HOST code: int main(int argc,char *argv[]) {… { double *_dev_sourceDataPointer; int _dev_sourceDataPointer_size = sizeof(double ) * (1764 - 0); _dev_sourceDataPointer = ((double *)(xomp_deviceMalloc(_dev_sourceDataPointer_size))); xomp_memcpyHostToDevice(((void *)_dev_sourceDataPointer),((const void *)sourceDataPointer),_dev_sourceDataPointer_size); double *_dev_destinationDataPointer; int _dev_destinationDataPointer_size = sizeof(double ) * (1764 - 0); _dev_destinationDataPointer = ((double *) (xomp_deviceMalloc(_dev_destinationDataPointer_size))); int _threads_per_block_ = xomp_get_maxThreadsPerBlock(); int _num_blocks_ = xomp_get_max1DBlock(__final_total_iters__3__ - 1 - 0 + 1); OUT__1__1527__<<<_num_blocks_,_threads_per_block_>>>(__final_total_iters__3__,__k_interval __4__,__j_interval__5__,_dev_sourceDataPointer,_dev_destinationDataPointer); xomp_freeDevice(_dev_sourceDataPointer); xomp_memcpyDeviceToHost(((void *)destinationDataPointer),((const void *)_dev_destinationDataPointer),_dev_destinationDataPointer_size); xomp_freeDevice(_dev_destinationDataPointer); } } Generated  CUDA  output   Device  code:   extern  "C"  __global__  void  OUT__1__1527__(int  __final_total_iters__3__,int  __k_interval__4__,int   __j_interval__5__,double  *_dev_sourceDataPointer,double  *_dev_des5na5onDataPointer)   {      int  _p_k;      int  _p_j;      int  _p_i;      int  _p___collapsed_index__7__;      int  _dev_lower;      int  _dev_upper;      int  _dev_loop_chunk_size;      int  _dev_loop_sched_index;    int  _dev_loop_stride;      int  _dev_thread_num  =  getCUDABlockThreadCount(1);      int  _dev_thread_id  =  getLoopIndexFromCUDAVariables(1);      XOMP_sta5c_sched_init(0,__final_total_iters__3__  -­‐   1,1,1,_dev_thread_num,_dev_thread_id,&_dev_loop_chunk_size,&_dev_loop_sched_index,&_dev_loop_stride);      while(XOMP_sta5c_sched_next(&_dev_loop_sched_index,__final_total_iters__3__  -­‐   1,1,_dev_loop_stride,_dev_loop_chunk_size,_dev_thread_num,_dev_thread_id,&_dev_lower,&_dev_upper))          for  (_p___collapsed_index__7__  =  _dev_lower;  _p___collapsed_index__7__  <=  _dev_upper;  _p___collapsed_index__7__   +=  1)  {              _p_k  =  _p___collapsed_index__7__  /  __k_interval__4__  *  1  +  lb2;              int  __k_remainder  =  _p___collapsed_index__7__  %  __k_interval__4__;              _p_j  =  __k_remainder  /  __j_interval__5__  *  1  +  lb1;              _p_i  =  __k_remainder  %  __j_interval__5__  *  1  +  lb0;              _dev_des5na5onDataPointer[arraySize_X  *  (arraySize_Y  *  _p_k  +  _p_j)  +  _p_i]  =  _dev_sourceDataPointer[arraySize_X  *   (arraySize_Y  *  (_p_k  +  -­‐1)  +  _p_j)  +  _p_i]  +  _dev_sourceDataPointer[arraySize_X  *  (arraySize_Y  *  (_p_k  +  1)  +  _p_j)  +  _p_i]  +   _dev_sourceDataPointer[arraySize_X  *  (arraySize_Y  *  _p_k  +  (_p_j  +  -­‐1))  +  _p_i]  +  _dev_sourceDataPointer[arraySize_X  *   (arraySize_Y  *  _p_k  +  (_p_j  +  1))  +  _p_i]  +  _dev_sourceDataPointer[arraySize_X  *  (arraySize_Y  *  _p_k  +  _p_j)  +  (_p_i  +  -­‐1)]  +   _dev_sourceDataPointer[arraySize_X  *  (arraySize_Y  *  _p_k  +  _p_j)  +  (_p_i  +  1)]  +  _dev_sourceDataPointer[arraySize_X  *   (arraySize_Y  *  _p_k  +  _p_j)  +  _p_i]  *  -­‐6.00000;          }      }