SlideShare une entreprise Scribd logo
I Workshop on command-
line tools
(day 2)
Center for Applied Genomics
Children's Hospital of Philadelphia
February 12-13, 2015
awk - a powerful way to check conditions
and show specific columns
Example: show only CNV that use less than 3
targets (exons)
tail -n +2 DATA.xcnv | awk '$8 <= 3'
awk - different ways to do the same thing
tail -n +2 DATA.xcnv | awk '$8 <= 3'
# same effect 1
tail -n +2 DATA.xcnv | awk '$8 <= 3 {print}'
# same effect 2
tail -n +2 DATA.xcnv | awk 'if ($8 <= 3) {print}'
# same effect 3
tail -n +2 DATA.xcnv | awk 'if ($8 <= 3) {print $0}'
# different effect
tail -n +2 DATA.xcnv | awk 'if ($8 <= 3) {print $1}'
awk - more options on if statement
# Applying XHMM "gold" thresholds (KB >= 1,
# NUM_TARG >= 3, Q_SOME >= 65, Q_NON_DIPLOID >= 65)
tail -n +2 DATA.xcnv | 
awk '$4 >= 1 && $8 >= 3 && $10 >= 65 && $11 >= 65' 
> DATA.gold.xcnv
# Using only awk
awk 'NR > 1 && $4 >= 1 && $8 >= 3 &&
$10 >= 65 && $11 >= 65' DATA.xcnv > DATA.gold2.xcnv
diff - compare files line by line
# Compare
diff DATA.gold.xcnv DATA.gold2.xcnv
# Tip: install tkdiff to use a
# graphic version of diff
Exercises
1. Using adhd.map, show 10 SNPs with rsID starting with 'rs' on
chrom. 2, between positions 1Mb and 2Mb
2. Check which chromosome has more SNPs
3. Check which snp IDs are duplicated
Suggestions
# 1.
grep 'brs' adhd.map | 
awk '$1 == 2 && int($4) >= 1000000 && int($4) <= 2000000' | 
less
# 2.
cut -f1 adhd.map | sort | uniq -c | sort -k1n | tail -1
# 3.
cut -f2 adhd.map | sort | uniq -c | awk '$1 > 1'
More awk - inserting external variables
awk -v Mb=1000000 -v chrom=2 
'$1 == chrom && int($4) >= Mb && int($4) <= 2*Mb' 
adhd.map | less
# Printing specific columns
awk -v Mb=1000000 -v chrom=2 
'$1 == chrom && int($4) >= Mb && int($4) <= 2*Mb
{print $1" "$2" "$4}' 
adhd.map | less
Using awk to check number of variants
in ped files
# Options using only awk, but takes (much) more time
awk 'NR == 1 {print (NF-6)/2}' adhd.ped
awk 'NR < 2 {print (NF-6)/2}' adhd.ped # Slow, too
# Better alternative
head -n 1 adhd.ped | awk '{print (NF-6)/2}'
# Now, the map file
wc -l adhd.map
time - time command execution
time head -n 1 adhd.ped | awk '{print (NF-6)/2}'
real 0m0.485s
user 0m0.391s
sys 0m0.064s
time awk 'NR < 2 {print (NF-6)/2}' adhd.ped
# Forget… just press Ctrl+C
real 1m0.611s
user 0m51.261s
sys 0m0.826s
top - display and update sorted information
about processes / display Linux taks
top
z : color
k : kill process
u : choose specific user
c : show complete commands running
1 : show usage of singles CPUs
q : quit
screen - screen manager with terminal emulation (i)
screen
screen -S <session_name>
Ctrl+a, then c: create window
Ctrl+a, then n: go to next window
Ctrl+a, then p: go to previous window
Ctrl+a, then 0: go to window number 0
Ctrl+a, then z: leave your session, but keep running
screen - screen manager with terminal emulation (ii)
Ctrl+a, then [ : activate copy mode (to scroll screen)
q : quit copy mode
exit : close current window
screen -r : resume the only session detached
screen -r <session_name> : resume specific
session detached
screen -rD <session_name> : reattach session
split - split a file into pieces
split -l <lines_of_each_piece> <input> <prefix>
# Example
split -l 100000 adhd.map map_
wc -l map_*
in-line Perl/sed to find and replace (i)
head DATA.gold.xcnv | cut -f3 | perl -pe 's/chr/CHR/g'
head DATA.gold.xcnv | cut -f3 | perl -pe 's/chr//g'
# Other possibilities
head DATA.gold.xcnv | cut -f3 | perl -pe 's|chr||g'
head DATA.gold.xcnv | cut -f3 | perl -pe 's!chr!!g'
head DATA.gold.xcnv | cut -f3 | sed 's/chr//g'
# Creating a BED file
head DATA.gold.xcnv | cut -f3 | perl -pe 's/[:-]/t/g'
in-line Perl/sed to find and replace (ii)
# "s" means substitute
# "g" means global (replace all matches, not only first)
# See the difference...
head DATA.gold.xcnv | cut -f3 | sed 's/9/nine/g'
head DATA.gold.xcnv | cut -f3 | sed 's/9/nine/'
# Adding more replacements
head DATA.gold.xcnv | cut -f3 | sed 's/1/one/g; s/2/two/g'
copy from terminal to clipboard/
paste from clipboard to terminal
# This is like Ctrl+V in your terminal
pbpaste
# This is like Ctrl+C from your terminal
head DATA.xcnv | pbcopy
# Then, Ctrl+V in other text editor
# On Linux, you can install "xclip"
http://sourceforge.net/projects/xclip/
datamash - command-line calculations
tail -n +2 DATA.xcnv | 
head | 
cut -f6,10,11 | 
datamash mean 1 sum 2 min 3
# mean of 1st column
# sum of 2nd column
# minimum of 3rd column
http://www.gnu.org/software/datamash/
touch - change file access and
modification times
ls -lh DATA.gold.xcnv
touch DATA.gold.xcnv
ls -lh DATA.gold.xcnv
Introduction to "for" loop
tail -n +2 DATA.xcnv | cut -f1 | sort | uniq | head >
samples.txt
for sample in `cat samples.txt`; do touch $sample.txt; done
ls -lh Sample*
for sample in `cat samples.txt`; do
mv $sample.txt $sample.csv;
done
Variables (i)
i=1
name=Leandro
count=`wc -l adhd.map`
echo $i
echo $name
echo $count
Variables (ii)
# Examples
bwa=/home/users/llima/tools/bwa
hg19=/references/hg19.fasta
# Do not run
$bwa index $hg19
System variables
echo $HOME
echo $USER
echo $PWD
# directory where bash looks for your programs
echo $PATH
Exercise
1. Create a program that shows input
parameters/arguments
2. Create a program (say, "fields", or
"colnames") that prints the column names of
a <tab>-delimited file (example: DATA.xcnv)
3. Send this program to your PATH
Running a bash script (i)
cat > arguments.sh
echo Your program is $0
echo Your first argument is $1
echo Your second argument is $2
echo You entered $# parameters.
# Ctrl+C to exit "cat"
Running a bash script (ii)
bash arguments.sh
bash arguments.sh A B C D E
ls -lh arguments.sh
-rw-r--r--
# First character
b Block special file.
c Character special file.
d Directory.
l Symbolic link.
s Socket link.
p FIFO.
- Regular file.
chmod - set permissions (i)
Next characters
user, group, others | read, write, execute
ls -lh arguments.sh
-rw-r--r--
# Everybody can read
# Only user can write/modify
chmod - set permissions (ii)
# Add writing permission to group
chmod g+w arguments.sh
ls -lh arguments.sh
# Remove writing permission from group
chmod g-w arguments.sh
ls -lh arguments.sh
# Add execution permission to all
chmod a+x arguments.sh
ls -lh arguments.sh
chmod - set permissions (iii)
# Add writing permission to group
./arguments.sh
./arguments.sh A B C D E
# change the name
mv arguments.sh arguments
# Send to your PATH (showing on Mac)
sudo cp arguments /usr/local/bin/
# Go to other directory
# Type argu<Tab>, and "which arguments"
Run your program again

Contenu connexe

Tendances

Tracing and awk in ns2
Tracing and awk in ns2Tracing and awk in ns2
Tracing and awk in ns2
Pradeep Kumar TS
 
This is not your father's monitoring.
This is not your father's monitoring.This is not your father's monitoring.
This is not your father's monitoring.
Mathias Herberts
 
Shell Script to Extract IP Address, MAC Address Information
Shell Script to Extract IP Address, MAC Address InformationShell Script to Extract IP Address, MAC Address Information
Shell Script to Extract IP Address, MAC Address Information
VCP Muthukrishna
 
Cs757 ns2-tutorial-exercise
Cs757 ns2-tutorial-exerciseCs757 ns2-tutorial-exercise
Cs757 ns2-tutorial-exercise
Pratik Joshi
 
Linux Shell Scripts and Shell Commands✌️
Linux Shell Scripts and Shell Commands✌️Linux Shell Scripts and Shell Commands✌️
Linux Shell Scripts and Shell Commands✌️
Nazmul Hyder
 
KubeCon EU 2016: Custom Volume Plugins
KubeCon EU 2016: Custom Volume PluginsKubeCon EU 2016: Custom Volume Plugins
KubeCon EU 2016: Custom Volume Plugins
KubeAcademy
 
Artimon - Apache Flume (incubating) NYC Meetup 20111108
Artimon - Apache Flume (incubating) NYC Meetup 20111108Artimon - Apache Flume (incubating) NYC Meetup 20111108
Artimon - Apache Flume (incubating) NYC Meetup 20111108
Mathias Herberts
 
Nosql hands on handout 04
Nosql hands on handout 04Nosql hands on handout 04
Nosql hands on handout 04
Krishna Sankar
 
Bash Scripting Workshop
Bash Scripting WorkshopBash Scripting Workshop
Bash Scripting Workshop
Ahmed Magdy Ezzeldin, MSc.
 
"PostgreSQL and Python" Lightning Talk @EuroPython2014
"PostgreSQL and Python" Lightning Talk @EuroPython2014"PostgreSQL and Python" Lightning Talk @EuroPython2014
"PostgreSQL and Python" Lightning Talk @EuroPython2014
Henning Jacobs
 
Bash Script Disk Space Utilization Report and EMail
Bash Script Disk Space Utilization Report and EMailBash Script Disk Space Utilization Report and EMail
Bash Script Disk Space Utilization Report and EMail
VCP Muthukrishna
 
Dtalk shell
Dtalk shellDtalk shell
Dtalk shell
Miha Mencin
 
File Space Usage Information and EMail Report - Shell Script
File Space Usage Information and EMail Report - Shell ScriptFile Space Usage Information and EMail Report - Shell Script
File Space Usage Information and EMail Report - Shell Script
VCP Muthukrishna
 
2015 555 kharchenko_ppt
2015 555 kharchenko_ppt2015 555 kharchenko_ppt
2015 555 kharchenko_ppt
Maxym Kharchenko
 
Python postgre sql a wonderful wedding
Python postgre sql   a wonderful weddingPython postgre sql   a wonderful wedding
Python postgre sql a wonderful wedding
Stéphane Wirtel
 
Coming Out Of Your Shell - A Comparison of *Nix Shells
Coming Out Of Your Shell - A Comparison of *Nix ShellsComing Out Of Your Shell - A Comparison of *Nix Shells
Coming Out Of Your Shell - A Comparison of *Nix Shells
Kel Cecil
 
dplyr
dplyrdplyr
Neatly Hashing a Tree: FP tree-fold in Perl5 & Perl6
Neatly Hashing a Tree: FP tree-fold in Perl5 & Perl6Neatly Hashing a Tree: FP tree-fold in Perl5 & Perl6
Neatly Hashing a Tree: FP tree-fold in Perl5 & Perl6
Workhorse Computing
 
How-to Integração Postfi
How-to Integração PostfiHow-to Integração Postfi
How-to Integração Postfi
Thiago Cavalcante
 
Process monitoring in UNIX shell scripting
Process monitoring in UNIX shell scriptingProcess monitoring in UNIX shell scripting
Process monitoring in UNIX shell scripting
Dan Morrill
 

Tendances (20)

Tracing and awk in ns2
Tracing and awk in ns2Tracing and awk in ns2
Tracing and awk in ns2
 
This is not your father's monitoring.
This is not your father's monitoring.This is not your father's monitoring.
This is not your father's monitoring.
 
Shell Script to Extract IP Address, MAC Address Information
Shell Script to Extract IP Address, MAC Address InformationShell Script to Extract IP Address, MAC Address Information
Shell Script to Extract IP Address, MAC Address Information
 
Cs757 ns2-tutorial-exercise
Cs757 ns2-tutorial-exerciseCs757 ns2-tutorial-exercise
Cs757 ns2-tutorial-exercise
 
Linux Shell Scripts and Shell Commands✌️
Linux Shell Scripts and Shell Commands✌️Linux Shell Scripts and Shell Commands✌️
Linux Shell Scripts and Shell Commands✌️
 
KubeCon EU 2016: Custom Volume Plugins
KubeCon EU 2016: Custom Volume PluginsKubeCon EU 2016: Custom Volume Plugins
KubeCon EU 2016: Custom Volume Plugins
 
Artimon - Apache Flume (incubating) NYC Meetup 20111108
Artimon - Apache Flume (incubating) NYC Meetup 20111108Artimon - Apache Flume (incubating) NYC Meetup 20111108
Artimon - Apache Flume (incubating) NYC Meetup 20111108
 
Nosql hands on handout 04
Nosql hands on handout 04Nosql hands on handout 04
Nosql hands on handout 04
 
Bash Scripting Workshop
Bash Scripting WorkshopBash Scripting Workshop
Bash Scripting Workshop
 
"PostgreSQL and Python" Lightning Talk @EuroPython2014
"PostgreSQL and Python" Lightning Talk @EuroPython2014"PostgreSQL and Python" Lightning Talk @EuroPython2014
"PostgreSQL and Python" Lightning Talk @EuroPython2014
 
Bash Script Disk Space Utilization Report and EMail
Bash Script Disk Space Utilization Report and EMailBash Script Disk Space Utilization Report and EMail
Bash Script Disk Space Utilization Report and EMail
 
Dtalk shell
Dtalk shellDtalk shell
Dtalk shell
 
File Space Usage Information and EMail Report - Shell Script
File Space Usage Information and EMail Report - Shell ScriptFile Space Usage Information and EMail Report - Shell Script
File Space Usage Information and EMail Report - Shell Script
 
2015 555 kharchenko_ppt
2015 555 kharchenko_ppt2015 555 kharchenko_ppt
2015 555 kharchenko_ppt
 
Python postgre sql a wonderful wedding
Python postgre sql   a wonderful weddingPython postgre sql   a wonderful wedding
Python postgre sql a wonderful wedding
 
Coming Out Of Your Shell - A Comparison of *Nix Shells
Coming Out Of Your Shell - A Comparison of *Nix ShellsComing Out Of Your Shell - A Comparison of *Nix Shells
Coming Out Of Your Shell - A Comparison of *Nix Shells
 
dplyr
dplyrdplyr
dplyr
 
Neatly Hashing a Tree: FP tree-fold in Perl5 & Perl6
Neatly Hashing a Tree: FP tree-fold in Perl5 & Perl6Neatly Hashing a Tree: FP tree-fold in Perl5 & Perl6
Neatly Hashing a Tree: FP tree-fold in Perl5 & Perl6
 
How-to Integração Postfi
How-to Integração PostfiHow-to Integração Postfi
How-to Integração Postfi
 
Process monitoring in UNIX shell scripting
Process monitoring in UNIX shell scriptingProcess monitoring in UNIX shell scripting
Process monitoring in UNIX shell scripting
 

Similaire à Workshop on command line tools - day 2

Spark + Clojure for Topic Discovery - Zalando Tech Clojure/Conj Talk
Spark + Clojure for Topic Discovery - Zalando Tech Clojure/Conj TalkSpark + Clojure for Topic Discovery - Zalando Tech Clojure/Conj Talk
Spark + Clojure for Topic Discovery - Zalando Tech Clojure/Conj Talk
Zalando Technology
 
The Art of Command Line (2021)
The Art of Command Line (2021)The Art of Command Line (2021)
The Art of Command Line (2021)
Kenta Yamamoto
 
Shell Script Disk Usage Report and E-Mail Current Threshold Status
Shell Script  Disk Usage Report and E-Mail Current Threshold StatusShell Script  Disk Usage Report and E-Mail Current Threshold Status
Shell Script Disk Usage Report and E-Mail Current Threshold Status
VCP Muthukrishna
 
Node Boot Camp
Node Boot CampNode Boot Camp
Node Boot Camp
Troy Miles
 
Refactoring to Macros with Clojure
Refactoring to Macros with ClojureRefactoring to Macros with Clojure
Refactoring to Macros with Clojure
Dmitry Buzdin
 
R (Shiny Package) - Server Side Code for Decision Support System
R (Shiny Package) - Server Side Code for Decision Support SystemR (Shiny Package) - Server Side Code for Decision Support System
R (Shiny Package) - Server Side Code for Decision Support System
Maithreya Chakravarthula
 
os lab assignment.pdf
os lab assignment.pdfos lab assignment.pdf
os lab assignment.pdf
SadamunAhemedBiplob2
 
Introduction to Unix - POS420Unix  Lab Exercise Week 3 BTo.docx
Introduction to Unix - POS420Unix  Lab Exercise Week 3 BTo.docxIntroduction to Unix - POS420Unix  Lab Exercise Week 3 BTo.docx
Introduction to Unix - POS420Unix  Lab Exercise Week 3 BTo.docx
mariuse18nolet
 
From mysql to MongoDB(MongoDB2011北京交流会)
From mysql to MongoDB(MongoDB2011北京交流会)From mysql to MongoDB(MongoDB2011北京交流会)
From mysql to MongoDB(MongoDB2011北京交流会)
Night Sailer
 
Paexec — distributes tasks over network or CPUs
Paexec — distributes tasks over network or CPUsPaexec — distributes tasks over network or CPUs
Paexec — distributes tasks over network or CPUs
Minsk Linux User Group
 
Time Series Analysis and Mining with R
Time Series Analysis and Mining with RTime Series Analysis and Mining with R
Time Series Analysis and Mining with R
Yanchang Zhao
 
Do snow.rwn
Do snow.rwnDo snow.rwn
Do snow.rwn
ARUN DN
 
Kafka Streams: Revisiting the decisions of the past (How I could have made it...
Kafka Streams: Revisiting the decisions of the past (How I could have made it...Kafka Streams: Revisiting the decisions of the past (How I could have made it...
Kafka Streams: Revisiting the decisions of the past (How I could have made it...
confluent
 
Ns network simulator
Ns network simulatorNs network simulator
Ns network simulator
Dr. Edwin Hernandez
 
CLI Wizardry - A Friendly Intro To sed/awk/grep
CLI Wizardry - A Friendly Intro To sed/awk/grepCLI Wizardry - A Friendly Intro To sed/awk/grep
CLI Wizardry - A Friendly Intro To sed/awk/grep
All Things Open
 
R programming
R programmingR programming
R programming
Pramodkumar Jha
 
Beyond PHP - It's not (just) about the code
Beyond PHP - It's not (just) about the codeBeyond PHP - It's not (just) about the code
Beyond PHP - It's not (just) about the code
Wim Godden
 
Code is not text! How graph technologies can help us to understand our code b...
Code is not text! How graph technologies can help us to understand our code b...Code is not text! How graph technologies can help us to understand our code b...
Code is not text! How graph technologies can help us to understand our code b...
Andreas Dewes
 
R basics
R basicsR basics
R basics
Sagun Baijal
 
Designing Opeation Oriented Web Applications / YAPC::Asia Tokyo 2011
Designing Opeation Oriented Web Applications / YAPC::Asia Tokyo 2011Designing Opeation Oriented Web Applications / YAPC::Asia Tokyo 2011
Designing Opeation Oriented Web Applications / YAPC::Asia Tokyo 2011
Masahiro Nagano
 

Similaire à Workshop on command line tools - day 2 (20)

Spark + Clojure for Topic Discovery - Zalando Tech Clojure/Conj Talk
Spark + Clojure for Topic Discovery - Zalando Tech Clojure/Conj TalkSpark + Clojure for Topic Discovery - Zalando Tech Clojure/Conj Talk
Spark + Clojure for Topic Discovery - Zalando Tech Clojure/Conj Talk
 
The Art of Command Line (2021)
The Art of Command Line (2021)The Art of Command Line (2021)
The Art of Command Line (2021)
 
Shell Script Disk Usage Report and E-Mail Current Threshold Status
Shell Script  Disk Usage Report and E-Mail Current Threshold StatusShell Script  Disk Usage Report and E-Mail Current Threshold Status
Shell Script Disk Usage Report and E-Mail Current Threshold Status
 
Node Boot Camp
Node Boot CampNode Boot Camp
Node Boot Camp
 
Refactoring to Macros with Clojure
Refactoring to Macros with ClojureRefactoring to Macros with Clojure
Refactoring to Macros with Clojure
 
R (Shiny Package) - Server Side Code for Decision Support System
R (Shiny Package) - Server Side Code for Decision Support SystemR (Shiny Package) - Server Side Code for Decision Support System
R (Shiny Package) - Server Side Code for Decision Support System
 
os lab assignment.pdf
os lab assignment.pdfos lab assignment.pdf
os lab assignment.pdf
 
Introduction to Unix - POS420Unix  Lab Exercise Week 3 BTo.docx
Introduction to Unix - POS420Unix  Lab Exercise Week 3 BTo.docxIntroduction to Unix - POS420Unix  Lab Exercise Week 3 BTo.docx
Introduction to Unix - POS420Unix  Lab Exercise Week 3 BTo.docx
 
From mysql to MongoDB(MongoDB2011北京交流会)
From mysql to MongoDB(MongoDB2011北京交流会)From mysql to MongoDB(MongoDB2011北京交流会)
From mysql to MongoDB(MongoDB2011北京交流会)
 
Paexec — distributes tasks over network or CPUs
Paexec — distributes tasks over network or CPUsPaexec — distributes tasks over network or CPUs
Paexec — distributes tasks over network or CPUs
 
Time Series Analysis and Mining with R
Time Series Analysis and Mining with RTime Series Analysis and Mining with R
Time Series Analysis and Mining with R
 
Do snow.rwn
Do snow.rwnDo snow.rwn
Do snow.rwn
 
Kafka Streams: Revisiting the decisions of the past (How I could have made it...
Kafka Streams: Revisiting the decisions of the past (How I could have made it...Kafka Streams: Revisiting the decisions of the past (How I could have made it...
Kafka Streams: Revisiting the decisions of the past (How I could have made it...
 
Ns network simulator
Ns network simulatorNs network simulator
Ns network simulator
 
CLI Wizardry - A Friendly Intro To sed/awk/grep
CLI Wizardry - A Friendly Intro To sed/awk/grepCLI Wizardry - A Friendly Intro To sed/awk/grep
CLI Wizardry - A Friendly Intro To sed/awk/grep
 
R programming
R programmingR programming
R programming
 
Beyond PHP - It's not (just) about the code
Beyond PHP - It's not (just) about the codeBeyond PHP - It's not (just) about the code
Beyond PHP - It's not (just) about the code
 
Code is not text! How graph technologies can help us to understand our code b...
Code is not text! How graph technologies can help us to understand our code b...Code is not text! How graph technologies can help us to understand our code b...
Code is not text! How graph technologies can help us to understand our code b...
 
R basics
R basicsR basics
R basics
 
Designing Opeation Oriented Web Applications / YAPC::Asia Tokyo 2011
Designing Opeation Oriented Web Applications / YAPC::Asia Tokyo 2011Designing Opeation Oriented Web Applications / YAPC::Asia Tokyo 2011
Designing Opeation Oriented Web Applications / YAPC::Asia Tokyo 2011
 

Plus de Leandro Lima

The use of BEDTools to analyze CNV regions
The use of BEDTools to analyze CNV regionsThe use of BEDTools to analyze CNV regions
The use of BEDTools to analyze CNV regions
Leandro Lima
 
Estudos genéticos em doenças Mendelianas e complexas
Estudos genéticos em doenças Mendelianas e complexasEstudos genéticos em doenças Mendelianas e complexas
Estudos genéticos em doenças Mendelianas e complexas
Leandro Lima
 
Uso do Cytoscape para Visualização e Análise de Redes
Uso do Cytoscape para Visualização e Análise de RedesUso do Cytoscape para Visualização e Análise de Redes
Uso do Cytoscape para Visualização e Análise de Redes
Leandro Lima
 
Brokers e Bridges (genes em rede de interação proteína-proteína)
Brokers e Bridges (genes em rede de interação proteína-proteína)Brokers e Bridges (genes em rede de interação proteína-proteína)
Brokers e Bridges (genes em rede de interação proteína-proteína)
Leandro Lima
 
Avanços e perspectivas em Bioinformática
Avanços e perspectivas em BioinformáticaAvanços e perspectivas em Bioinformática
Avanços e perspectivas em Bioinformática
Leandro Lima
 
Int. à Bioinformática (FMU - 08/05/2012)
Int. à Bioinformática (FMU - 08/05/2012)Int. à Bioinformática (FMU - 08/05/2012)
Int. à Bioinformática (FMU - 08/05/2012)
Leandro Lima
 
Redes Complexas aplicadas a Redes Sociais (09/05/2012 - FMU)
Redes Complexas aplicadas a Redes Sociais (09/05/2012 - FMU)Redes Complexas aplicadas a Redes Sociais (09/05/2012 - FMU)
Redes Complexas aplicadas a Redes Sociais (09/05/2012 - FMU)
Leandro Lima
 

Plus de Leandro Lima (7)

The use of BEDTools to analyze CNV regions
The use of BEDTools to analyze CNV regionsThe use of BEDTools to analyze CNV regions
The use of BEDTools to analyze CNV regions
 
Estudos genéticos em doenças Mendelianas e complexas
Estudos genéticos em doenças Mendelianas e complexasEstudos genéticos em doenças Mendelianas e complexas
Estudos genéticos em doenças Mendelianas e complexas
 
Uso do Cytoscape para Visualização e Análise de Redes
Uso do Cytoscape para Visualização e Análise de RedesUso do Cytoscape para Visualização e Análise de Redes
Uso do Cytoscape para Visualização e Análise de Redes
 
Brokers e Bridges (genes em rede de interação proteína-proteína)
Brokers e Bridges (genes em rede de interação proteína-proteína)Brokers e Bridges (genes em rede de interação proteína-proteína)
Brokers e Bridges (genes em rede de interação proteína-proteína)
 
Avanços e perspectivas em Bioinformática
Avanços e perspectivas em BioinformáticaAvanços e perspectivas em Bioinformática
Avanços e perspectivas em Bioinformática
 
Int. à Bioinformática (FMU - 08/05/2012)
Int. à Bioinformática (FMU - 08/05/2012)Int. à Bioinformática (FMU - 08/05/2012)
Int. à Bioinformática (FMU - 08/05/2012)
 
Redes Complexas aplicadas a Redes Sociais (09/05/2012 - FMU)
Redes Complexas aplicadas a Redes Sociais (09/05/2012 - FMU)Redes Complexas aplicadas a Redes Sociais (09/05/2012 - FMU)
Redes Complexas aplicadas a Redes Sociais (09/05/2012 - FMU)
 

Dernier

Voxxed Days Trieste 2024 - Unleashing the Power of Vector Search and Semantic...
Voxxed Days Trieste 2024 - Unleashing the Power of Vector Search and Semantic...Voxxed Days Trieste 2024 - Unleashing the Power of Vector Search and Semantic...
Voxxed Days Trieste 2024 - Unleashing the Power of Vector Search and Semantic...
Luigi Fugaro
 
What is Continuous Testing in DevOps - A Definitive Guide.pdf
What is Continuous Testing in DevOps - A Definitive Guide.pdfWhat is Continuous Testing in DevOps - A Definitive Guide.pdf
What is Continuous Testing in DevOps - A Definitive Guide.pdf
kalichargn70th171
 
A Comprehensive Guide on Implementing Real-World Mobile Testing Strategies fo...
A Comprehensive Guide on Implementing Real-World Mobile Testing Strategies fo...A Comprehensive Guide on Implementing Real-World Mobile Testing Strategies fo...
A Comprehensive Guide on Implementing Real-World Mobile Testing Strategies fo...
kalichargn70th171
 
The Rising Future of CPaaS in the Middle East 2024
The Rising Future of CPaaS in the Middle East 2024The Rising Future of CPaaS in the Middle East 2024
The Rising Future of CPaaS in the Middle East 2024
Yara Milbes
 
如何办理(hull学位证书)英国赫尔大学毕业证硕士文凭原版一模一样
如何办理(hull学位证书)英国赫尔大学毕业证硕士文凭原版一模一样如何办理(hull学位证书)英国赫尔大学毕业证硕士文凭原版一模一样
如何办理(hull学位证书)英国赫尔大学毕业证硕士文凭原版一模一样
gapen1
 
How Can Hiring A Mobile App Development Company Help Your Business Grow?
How Can Hiring A Mobile App Development Company Help Your Business Grow?How Can Hiring A Mobile App Development Company Help Your Business Grow?
How Can Hiring A Mobile App Development Company Help Your Business Grow?
ToXSL Technologies
 
Optimizing Your E-commerce with WooCommerce.pptx
Optimizing Your E-commerce with WooCommerce.pptxOptimizing Your E-commerce with WooCommerce.pptx
Optimizing Your E-commerce with WooCommerce.pptx
WebConnect Pvt Ltd
 
42 Ways to Generate Real Estate Leads - Sellxpert
42 Ways to Generate Real Estate Leads - Sellxpert42 Ways to Generate Real Estate Leads - Sellxpert
42 Ways to Generate Real Estate Leads - Sellxpert
vaishalijagtap12
 
Building API data products on top of your real-time data infrastructure
Building API data products on top of your real-time data infrastructureBuilding API data products on top of your real-time data infrastructure
Building API data products on top of your real-time data infrastructure
confluent
 
Unlock the Secrets to Effortless Video Creation with Invideo: Your Ultimate G...
Unlock the Secrets to Effortless Video Creation with Invideo: Your Ultimate G...Unlock the Secrets to Effortless Video Creation with Invideo: Your Ultimate G...
Unlock the Secrets to Effortless Video Creation with Invideo: Your Ultimate G...
The Third Creative Media
 
Why Apache Kafka Clusters Are Like Galaxies (And Other Cosmic Kafka Quandarie...
Why Apache Kafka Clusters Are Like Galaxies (And Other Cosmic Kafka Quandarie...Why Apache Kafka Clusters Are Like Galaxies (And Other Cosmic Kafka Quandarie...
Why Apache Kafka Clusters Are Like Galaxies (And Other Cosmic Kafka Quandarie...
Paul Brebner
 
Upturn India Technologies - Web development company in Nashik
Upturn India Technologies - Web development company in NashikUpturn India Technologies - Web development company in Nashik
Upturn India Technologies - Web development company in Nashik
Upturn India Technologies
 
The Role of DevOps in Digital Transformation.pdf
The Role of DevOps in Digital Transformation.pdfThe Role of DevOps in Digital Transformation.pdf
The Role of DevOps in Digital Transformation.pdf
mohitd6
 
Stork Product Overview: An AI-Powered Autonomous Delivery Fleet
Stork Product Overview: An AI-Powered Autonomous Delivery FleetStork Product Overview: An AI-Powered Autonomous Delivery Fleet
Stork Product Overview: An AI-Powered Autonomous Delivery Fleet
Vince Scalabrino
 
TMU毕业证书精仿办理
TMU毕业证书精仿办理TMU毕业证书精仿办理
TMU毕业证书精仿办理
aeeva
 
14 th Edition of International conference on computer vision
14 th Edition of International conference on computer vision14 th Edition of International conference on computer vision
14 th Edition of International conference on computer vision
ShulagnaSarkar2
 
WMF 2024 - Unlocking the Future of Data Powering Next-Gen AI with Vector Data...
WMF 2024 - Unlocking the Future of Data Powering Next-Gen AI with Vector Data...WMF 2024 - Unlocking the Future of Data Powering Next-Gen AI with Vector Data...
WMF 2024 - Unlocking the Future of Data Powering Next-Gen AI with Vector Data...
Luigi Fugaro
 
Going AOT: Everything you need to know about GraalVM for Java applications
Going AOT: Everything you need to know about GraalVM for Java applicationsGoing AOT: Everything you need to know about GraalVM for Java applications
Going AOT: Everything you need to know about GraalVM for Java applications
Alina Yurenko
 
Assure Contact Center Experiences for Your Customers With ThousandEyes
Assure Contact Center Experiences for Your Customers With ThousandEyesAssure Contact Center Experiences for Your Customers With ThousandEyes
Assure Contact Center Experiences for Your Customers With ThousandEyes
ThousandEyes
 
Orca: Nocode Graphical Editor for Container Orchestration
Orca: Nocode Graphical Editor for Container OrchestrationOrca: Nocode Graphical Editor for Container Orchestration
Orca: Nocode Graphical Editor for Container Orchestration
Pedro J. Molina
 

Dernier (20)

Voxxed Days Trieste 2024 - Unleashing the Power of Vector Search and Semantic...
Voxxed Days Trieste 2024 - Unleashing the Power of Vector Search and Semantic...Voxxed Days Trieste 2024 - Unleashing the Power of Vector Search and Semantic...
Voxxed Days Trieste 2024 - Unleashing the Power of Vector Search and Semantic...
 
What is Continuous Testing in DevOps - A Definitive Guide.pdf
What is Continuous Testing in DevOps - A Definitive Guide.pdfWhat is Continuous Testing in DevOps - A Definitive Guide.pdf
What is Continuous Testing in DevOps - A Definitive Guide.pdf
 
A Comprehensive Guide on Implementing Real-World Mobile Testing Strategies fo...
A Comprehensive Guide on Implementing Real-World Mobile Testing Strategies fo...A Comprehensive Guide on Implementing Real-World Mobile Testing Strategies fo...
A Comprehensive Guide on Implementing Real-World Mobile Testing Strategies fo...
 
The Rising Future of CPaaS in the Middle East 2024
The Rising Future of CPaaS in the Middle East 2024The Rising Future of CPaaS in the Middle East 2024
The Rising Future of CPaaS in the Middle East 2024
 
如何办理(hull学位证书)英国赫尔大学毕业证硕士文凭原版一模一样
如何办理(hull学位证书)英国赫尔大学毕业证硕士文凭原版一模一样如何办理(hull学位证书)英国赫尔大学毕业证硕士文凭原版一模一样
如何办理(hull学位证书)英国赫尔大学毕业证硕士文凭原版一模一样
 
How Can Hiring A Mobile App Development Company Help Your Business Grow?
How Can Hiring A Mobile App Development Company Help Your Business Grow?How Can Hiring A Mobile App Development Company Help Your Business Grow?
How Can Hiring A Mobile App Development Company Help Your Business Grow?
 
Optimizing Your E-commerce with WooCommerce.pptx
Optimizing Your E-commerce with WooCommerce.pptxOptimizing Your E-commerce with WooCommerce.pptx
Optimizing Your E-commerce with WooCommerce.pptx
 
42 Ways to Generate Real Estate Leads - Sellxpert
42 Ways to Generate Real Estate Leads - Sellxpert42 Ways to Generate Real Estate Leads - Sellxpert
42 Ways to Generate Real Estate Leads - Sellxpert
 
Building API data products on top of your real-time data infrastructure
Building API data products on top of your real-time data infrastructureBuilding API data products on top of your real-time data infrastructure
Building API data products on top of your real-time data infrastructure
 
Unlock the Secrets to Effortless Video Creation with Invideo: Your Ultimate G...
Unlock the Secrets to Effortless Video Creation with Invideo: Your Ultimate G...Unlock the Secrets to Effortless Video Creation with Invideo: Your Ultimate G...
Unlock the Secrets to Effortless Video Creation with Invideo: Your Ultimate G...
 
Why Apache Kafka Clusters Are Like Galaxies (And Other Cosmic Kafka Quandarie...
Why Apache Kafka Clusters Are Like Galaxies (And Other Cosmic Kafka Quandarie...Why Apache Kafka Clusters Are Like Galaxies (And Other Cosmic Kafka Quandarie...
Why Apache Kafka Clusters Are Like Galaxies (And Other Cosmic Kafka Quandarie...
 
Upturn India Technologies - Web development company in Nashik
Upturn India Technologies - Web development company in NashikUpturn India Technologies - Web development company in Nashik
Upturn India Technologies - Web development company in Nashik
 
The Role of DevOps in Digital Transformation.pdf
The Role of DevOps in Digital Transformation.pdfThe Role of DevOps in Digital Transformation.pdf
The Role of DevOps in Digital Transformation.pdf
 
Stork Product Overview: An AI-Powered Autonomous Delivery Fleet
Stork Product Overview: An AI-Powered Autonomous Delivery FleetStork Product Overview: An AI-Powered Autonomous Delivery Fleet
Stork Product Overview: An AI-Powered Autonomous Delivery Fleet
 
TMU毕业证书精仿办理
TMU毕业证书精仿办理TMU毕业证书精仿办理
TMU毕业证书精仿办理
 
14 th Edition of International conference on computer vision
14 th Edition of International conference on computer vision14 th Edition of International conference on computer vision
14 th Edition of International conference on computer vision
 
WMF 2024 - Unlocking the Future of Data Powering Next-Gen AI with Vector Data...
WMF 2024 - Unlocking the Future of Data Powering Next-Gen AI with Vector Data...WMF 2024 - Unlocking the Future of Data Powering Next-Gen AI with Vector Data...
WMF 2024 - Unlocking the Future of Data Powering Next-Gen AI with Vector Data...
 
Going AOT: Everything you need to know about GraalVM for Java applications
Going AOT: Everything you need to know about GraalVM for Java applicationsGoing AOT: Everything you need to know about GraalVM for Java applications
Going AOT: Everything you need to know about GraalVM for Java applications
 
Assure Contact Center Experiences for Your Customers With ThousandEyes
Assure Contact Center Experiences for Your Customers With ThousandEyesAssure Contact Center Experiences for Your Customers With ThousandEyes
Assure Contact Center Experiences for Your Customers With ThousandEyes
 
Orca: Nocode Graphical Editor for Container Orchestration
Orca: Nocode Graphical Editor for Container OrchestrationOrca: Nocode Graphical Editor for Container Orchestration
Orca: Nocode Graphical Editor for Container Orchestration
 

Workshop on command line tools - day 2

  • 1. I Workshop on command- line tools (day 2) Center for Applied Genomics Children's Hospital of Philadelphia February 12-13, 2015
  • 2. awk - a powerful way to check conditions and show specific columns Example: show only CNV that use less than 3 targets (exons) tail -n +2 DATA.xcnv | awk '$8 <= 3'
  • 3. awk - different ways to do the same thing tail -n +2 DATA.xcnv | awk '$8 <= 3' # same effect 1 tail -n +2 DATA.xcnv | awk '$8 <= 3 {print}' # same effect 2 tail -n +2 DATA.xcnv | awk 'if ($8 <= 3) {print}' # same effect 3 tail -n +2 DATA.xcnv | awk 'if ($8 <= 3) {print $0}' # different effect tail -n +2 DATA.xcnv | awk 'if ($8 <= 3) {print $1}'
  • 4. awk - more options on if statement # Applying XHMM "gold" thresholds (KB >= 1, # NUM_TARG >= 3, Q_SOME >= 65, Q_NON_DIPLOID >= 65) tail -n +2 DATA.xcnv | awk '$4 >= 1 && $8 >= 3 && $10 >= 65 && $11 >= 65' > DATA.gold.xcnv # Using only awk awk 'NR > 1 && $4 >= 1 && $8 >= 3 && $10 >= 65 && $11 >= 65' DATA.xcnv > DATA.gold2.xcnv
  • 5. diff - compare files line by line # Compare diff DATA.gold.xcnv DATA.gold2.xcnv # Tip: install tkdiff to use a # graphic version of diff
  • 6. Exercises 1. Using adhd.map, show 10 SNPs with rsID starting with 'rs' on chrom. 2, between positions 1Mb and 2Mb 2. Check which chromosome has more SNPs 3. Check which snp IDs are duplicated
  • 7. Suggestions # 1. grep 'brs' adhd.map | awk '$1 == 2 && int($4) >= 1000000 && int($4) <= 2000000' | less # 2. cut -f1 adhd.map | sort | uniq -c | sort -k1n | tail -1 # 3. cut -f2 adhd.map | sort | uniq -c | awk '$1 > 1'
  • 8. More awk - inserting external variables awk -v Mb=1000000 -v chrom=2 '$1 == chrom && int($4) >= Mb && int($4) <= 2*Mb' adhd.map | less # Printing specific columns awk -v Mb=1000000 -v chrom=2 '$1 == chrom && int($4) >= Mb && int($4) <= 2*Mb {print $1" "$2" "$4}' adhd.map | less
  • 9. Using awk to check number of variants in ped files # Options using only awk, but takes (much) more time awk 'NR == 1 {print (NF-6)/2}' adhd.ped awk 'NR < 2 {print (NF-6)/2}' adhd.ped # Slow, too # Better alternative head -n 1 adhd.ped | awk '{print (NF-6)/2}' # Now, the map file wc -l adhd.map
  • 10. time - time command execution time head -n 1 adhd.ped | awk '{print (NF-6)/2}' real 0m0.485s user 0m0.391s sys 0m0.064s time awk 'NR < 2 {print (NF-6)/2}' adhd.ped # Forget… just press Ctrl+C real 1m0.611s user 0m51.261s sys 0m0.826s
  • 11. top - display and update sorted information about processes / display Linux taks top z : color k : kill process u : choose specific user c : show complete commands running 1 : show usage of singles CPUs q : quit
  • 12. screen - screen manager with terminal emulation (i) screen screen -S <session_name> Ctrl+a, then c: create window Ctrl+a, then n: go to next window Ctrl+a, then p: go to previous window Ctrl+a, then 0: go to window number 0 Ctrl+a, then z: leave your session, but keep running
  • 13. screen - screen manager with terminal emulation (ii) Ctrl+a, then [ : activate copy mode (to scroll screen) q : quit copy mode exit : close current window screen -r : resume the only session detached screen -r <session_name> : resume specific session detached screen -rD <session_name> : reattach session
  • 14. split - split a file into pieces split -l <lines_of_each_piece> <input> <prefix> # Example split -l 100000 adhd.map map_ wc -l map_*
  • 15. in-line Perl/sed to find and replace (i) head DATA.gold.xcnv | cut -f3 | perl -pe 's/chr/CHR/g' head DATA.gold.xcnv | cut -f3 | perl -pe 's/chr//g' # Other possibilities head DATA.gold.xcnv | cut -f3 | perl -pe 's|chr||g' head DATA.gold.xcnv | cut -f3 | perl -pe 's!chr!!g' head DATA.gold.xcnv | cut -f3 | sed 's/chr//g' # Creating a BED file head DATA.gold.xcnv | cut -f3 | perl -pe 's/[:-]/t/g'
  • 16. in-line Perl/sed to find and replace (ii) # "s" means substitute # "g" means global (replace all matches, not only first) # See the difference... head DATA.gold.xcnv | cut -f3 | sed 's/9/nine/g' head DATA.gold.xcnv | cut -f3 | sed 's/9/nine/' # Adding more replacements head DATA.gold.xcnv | cut -f3 | sed 's/1/one/g; s/2/two/g'
  • 17. copy from terminal to clipboard/ paste from clipboard to terminal # This is like Ctrl+V in your terminal pbpaste # This is like Ctrl+C from your terminal head DATA.xcnv | pbcopy # Then, Ctrl+V in other text editor # On Linux, you can install "xclip" http://sourceforge.net/projects/xclip/
  • 18. datamash - command-line calculations tail -n +2 DATA.xcnv | head | cut -f6,10,11 | datamash mean 1 sum 2 min 3 # mean of 1st column # sum of 2nd column # minimum of 3rd column http://www.gnu.org/software/datamash/
  • 19. touch - change file access and modification times ls -lh DATA.gold.xcnv touch DATA.gold.xcnv ls -lh DATA.gold.xcnv
  • 20. Introduction to "for" loop tail -n +2 DATA.xcnv | cut -f1 | sort | uniq | head > samples.txt for sample in `cat samples.txt`; do touch $sample.txt; done ls -lh Sample* for sample in `cat samples.txt`; do mv $sample.txt $sample.csv; done
  • 21. Variables (i) i=1 name=Leandro count=`wc -l adhd.map` echo $i echo $name echo $count
  • 23. System variables echo $HOME echo $USER echo $PWD # directory where bash looks for your programs echo $PATH
  • 24. Exercise 1. Create a program that shows input parameters/arguments 2. Create a program (say, "fields", or "colnames") that prints the column names of a <tab>-delimited file (example: DATA.xcnv) 3. Send this program to your PATH
  • 25. Running a bash script (i) cat > arguments.sh echo Your program is $0 echo Your first argument is $1 echo Your second argument is $2 echo You entered $# parameters. # Ctrl+C to exit "cat"
  • 26. Running a bash script (ii) bash arguments.sh bash arguments.sh A B C D E
  • 27. ls -lh arguments.sh -rw-r--r-- # First character b Block special file. c Character special file. d Directory. l Symbolic link. s Socket link. p FIFO. - Regular file. chmod - set permissions (i)
  • 28. Next characters user, group, others | read, write, execute ls -lh arguments.sh -rw-r--r-- # Everybody can read # Only user can write/modify chmod - set permissions (ii)
  • 29. # Add writing permission to group chmod g+w arguments.sh ls -lh arguments.sh # Remove writing permission from group chmod g-w arguments.sh ls -lh arguments.sh # Add execution permission to all chmod a+x arguments.sh ls -lh arguments.sh chmod - set permissions (iii)
  • 30. # Add writing permission to group ./arguments.sh ./arguments.sh A B C D E # change the name mv arguments.sh arguments # Send to your PATH (showing on Mac) sudo cp arguments /usr/local/bin/ # Go to other directory # Type argu<Tab>, and "which arguments" Run your program again