6. Syntax
6
• Package
import time
import time as t
• Format
print('hello %s' % 'iii')
print('hello %s %s' % ('iii', 'iii'))
• Package
const time = require('time')
• Format
let name = 'iii';
console.log(`hello ${name}`);
7. Syntax
7
• If … else …
if 1 == 0:
…
else:
…
• For Loop
for i in [0, 1, 2, 3]:
…
• If … else …
if (1 == 0) {
...
} else {
…
}
• For Loop
for (let i = 0; i < 4; i++) {
…
}
8. Syntax
8
• While Loop
while True:
• List
[0, 1, 2, 3]
• Dictionary
{'a': 0, 'b': 1, 'c': 2}
• Tuple
(0, 1, 2)
• While Loop
while (true) { ... }
• Array
[0, 1, 2, 3]
• Object
{a: 0, b: 1, c: 2}
10. Syntax
10
• Search
– String
if substr in str:
– List
if item in list:
– Dictionary
if key in dict:
• Search
– String
if (str.includes(substr))
– Array
if (array.includes(item))
– Object
if (key in obj)
17. JSON
17
import json
# JSON String to List or Dictionary
json.loads(<String>)
# List or Dictionary to JSON String
json.dumps (<List>)
json.dumps (<Dict>)
# JSON String to Array or Object
JSON.parse(<String>)
# Array or Object to JSON String
JSON.stringify(<Array>)
JSON.stringify(<Object>)
18. XML
18
pip install lxml
import xml.etree.ElementTree as ET
# Load XML from File
tree = ET.parse(<FilePath>)
root = tree.getroot()
# Load XML from String
root = ET.fromstring(<String>)
npm install -g xpath
npm install -g xmldom
const xpath = require('xpath');
const xmldom = require('xmldom');
# Load XML from String
let parser = new xmldom.DOMParser();
let doc = parser
.parseFromString(<String>);
19. XML
19
# One Level Search
for node in root:
# Recursive Search
nodes = root.findall(<XPath>)
for node in nodes:
# Search
let nodes = xpath.select(<XPath>, doc);
nodes.forEach(function(node) {
console.log(node.localName);
for (var key in node.attributes) {
let attr = node.attributes[key];
if (attr.constructor.name != 'Attr') {
continue;
}
console.log(attr.name, attr.value);
}
});
20. URL
20
import urllib.parse as UP
# Parse / Unparse
parseResult = UP.urlparse(<Url>)
url = UP.urlunparse(<ParseResult>)
npm install -g url-parse
const urlparse = require('url-parse');
# Parse / Unparse
let parseResult = new urlparse(<Url>);
let url = parseResult.toString();
22. Regex
22
• Start
^
• End
$
• Range
[<Start>-<End>]
• Number
d
• Character
w
• Invisible Character
s
123ABC /^1/
123ABC /5$/
123ABC /^[0-2]/
123ABC /^d/
123ABC /w$/
Tab, Space, Escape, …
23. Regex: Repeat
23
• Count
{N}
• Count Range
{N1,N2}
• One or More
+
• Zero or More
*
• Zero or One
?
123ABC /^d{3}/
123ABC /^d{1,3}/
123ABC 1ABCDE /^d+/
123ABC ABCDEF /^d*/
1ABCDE ABCDEF /^d?/
29. Regex
29
import re
# Find First Match
match = re.search(<Pattern>, <String>)
# Find All Matches
match = re.findall(<Pattern>, <String>)
# Get Matched Groups
match.group(<Index>)
match.group(<Name>)
const re = /<Pattern>/;
let match = re.exec(<String>);
# Get Matched Groups
match[<Index>]
match.groups[<Name>]
33. Chrome Developer Tools
33
• Elements
See Elements In DOM
Id, Class, Attribute, ...
• Network
See Requests, Responses
Urls, Methods, Headers, Cookies, Bodies, ...
43. HTTP Request with Headers
43
resp = requests.get(
<Url>,
headers=<Dict>
)
request({
method: 'GET',
uri: <Url>,
headers: <Object>
}, function (err, resp, body) {
// Do something
});
44. HTTP Request with CookieJar
44
jar =
requests.cookies.RequestsCookieJar()
jar.set(<CookieName>, <CookieValue>)
resp = requests.get(
<Url>,
cookies=jar
)
let req = request.defaults({
jar: true
});
let jar = request.jar();
let cookie = request.cookie(<Cookie>);
jar.setCookie(cookie);
req({
url: <Url>,
jar: jar
}, function (err, resp, body) {
// Do something
})
45. HTTP Request with Cookies
45
request({
method: 'GET',
uri: <Url>,
headers: {
'Cookie': <String>
}
}, function (err, resp, body) {
// Do something
});
resp = requests.get(
<Url>,
headers={
'Cookie': <String>
}
)
46. HTTP Request with Payload
46
resp = requests.post(
<Url>,
data=<Body>
)
request({
method: 'POST',
uri: <Url>,
body: <Body>
}, function (err, resp, body) {
// Do something
});
47. HTTP Response
47
• Status Code
resp.status_code
• Headers
resp.headers
• Cookies
resp.cookies
• Status Code
resp. statusCode
• Headers
resp.headers
• Cookies
resp.cookies
48. HTTP Response Content
48
• Binary Content
resp.content
• Text Content
resp.text
• JSON
resp.json()
• Third parameter in callback
function (err, resp, body) {
// Do something
}
49. DOM Parsing
49
pip install pyquery
import pyquery
# Load From String
d = pyquery.PyQuery(<HTML>)
# Load From Url
d = pyquery.PyQuery(url=<Url>)
npm install -g jsdom
npm install -g jquery
const jsdom = require("jsdom");
const jquery = require("jquery");
let inst = new jsdom.JSDOM(<HTML>);
let $ = jquery(inst.window);
50. DOM Parsing
50
# Find by CSS Selector
p = d(<Expression>)
# Get HTML From Element
p.html()
# Get Inner Text From Element
p.text()
# Get Value From Element’s Attribute
p.attr[<Name>]
# Find by CSS Selector
p = $(<Expression>)
# Get HTML From Element
p.html()
# Get Inner Text From Element
p.text()
# Get Value From Element’s Attribute
p.attr(<Name>)
51. Python: Beautiful Soup 4
51
pip install bs4
import bs4
# Load From String
d = bs4.BeautifulSoup(<HTML>, 'html.parser')
52. Python: Beautiful Soup 4
52
# Find by Element
p = d.find_all(<Tag>, <attr-name>=<attr-val>, ...)
p = d.find_all(<Regex>, <attr-name>=<attr-val>, ...)
p = d.find_all(<Array>, <attr-name>=<attr-val>, ...)
p = d.find(<Tag>, <attr-name>=<attr-val>, ...)
p = d.find(<Regex>, <attr-name>=<attr-val>, ...)
p = d.find(<Array>, <attr-name>=<attr-val>, ...)
# Find by CSS Selector
p = d.select(<Expression>)
p = d.select_one(<Expression>)
53. Python: Beautiful Soup 4
53
# Extract Text From Element
p.get_text()
# Get Value From Element’s Attribute
p.get(<AttrName>)
68. Selenium: Waits
68
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
element = WebDriverWait(chrome, <timeout>).until(
EC.<PredefinedCondition>((By.ID, <ID>))
)
element = WebDriverWait(chrome, <timeout>).until(
EC.<PredefinedCondition>((By.CSS_SELECTOR, <CSS Selector>))
)