PhantomJS
Get Web Page Content
Use page.evaluate() method to evaluate the page.
To extract title tag use document.title.
status can be: status='success | fail'
/**
* Get HTML content from URL
* Simple JS DOM usage document.title
*/
var page = require('webpage').create(),
url = 'http://www.mikosoft.info';
page.open(url, function(status) {
console.log(status);
if(status === 'success') {
var title = page.evaluate(function() {
return document.title;
});
console.log('Page title is: '+title);
}
else console.log('404 Not found: '+url);
phantom.exit();
});
Notice that we reached title with simple JS DOM document.title .
If we want to apply jQuery selectors use
- page.includeJS() to get external file: http://ajax.googleapis.com/ajax/libs/jquery/1.10.2/jquery.min.js
- page.InjectJs() to get local file: ./assets/js/jquery/1.11.1/jquery.min.js
See examples:
Get title tag - JS DOM: 01get_titletag_dom.js
Get title tag - jQuery loaded from Google CDN by IncludeJs: 02get_titletag_jquery_ggl.js
Get head content - jQuery loaded from local file by InjectJs: 02get_titletag_jquery_loc.js
To grab the whole web page HTML use: document.documentElement.outerHTML
var page = require('webpage').create(),
url = 'http://www.mikosoft.info';
page.open(url, function(status) {
console.log(status);
if(status === 'success') {
var doc = page.evaluate(function() {
return document.documentElement.outerHTML;
});
console.log("URL code is: \n\n" +doc);
}
else console.log('404 Not found: '+url);
phantom.exit();
});