Pitchfork.com Album Rating 'API'
February 14, 2012A friend of mine told me a few weeks ago that he wanted to make a Chrome plugin to display Pitchfork album scores on Rdio album pages. This seemed like an interesting idea to me, and I wanted to take a stab at the JavaScript for it. The following is a rundown of my thought process while researching the problem and the end result.
Skip straight to a JSFiddle demo if words bore you.
Steps
My initial process went as follows:
- Use album and artist from Rdio URL to get URL of Pitchfork review page
- Scrape review page for score
Getting the Pitchfork Album Review URL
Upon first review of the Pitchfork site, it seemed like trying to generate URLs based on album name was out of the question. All Pitchfork album review pages contain an ID in the URL, such as http://pitchfork.com/reviews/albums/15551-bon-iver/
. The next best thing I could think of was to try and generate a search URL since the syntax was simple (http://pitchfork.com/search/?query=bon+iver
) and then scrape that page. However, when investigating a little further I saw that Pitchfork has an autocomplete ‘API’ which returns JSON.
Since it returned JSON, I was able to use a Yahoo Pipe to convert the JSON to JSONP. I wanted to keep this demo on the client-side as much as I could, so anything I could do to not have to setup a server-side proxy was a win. With the Yahoo Pipe returning JSONP I had a list of URLs for review pages when entering a search term.
Getting the Album Score
Staying the client-side spirit, I decided to use YQL and based my code off James Padolsey’s 2009 blog post on the subject. YQL made it relatively simple to take the review URL, parse the review score from the HTML with XPath, and return JSONP. I’m not an expert in these things so I feel like my XPath query is pretty fragile. If you have any suggestions to make it better, I’d to hear them. Fragile or not, my response data was JSON that contained the album’s score. Success!
Code
(function($, undefined) { | |
function sanitize(str) { | |
return str.toLowerCase(); | |
} | |
function normalizeP4kResult(result) { | |
var parts = result.split(' - '), | |
artist = parts[0], | |
album = parts[1]; | |
return { | |
artist: sanitize(artist), | |
album: sanitize(album) | |
}; | |
} | |
function returnResult(obj) { | |
return $.extend({ | |
url: obj.url | |
}, normalizeP4kResult(obj.name)); | |
} | |
function getAlbumUrl(data, textStatus, xhr) { | |
var searchData = data.value.items[0].json, | |
reviews = {}, | |
theResult = {}; | |
// Find the reviews object | |
for (var i = 0, m = searchData.length; i < m; i++) { | |
if (searchData[i].label.toLowerCase() === "reviews") { | |
reviews = searchData[i]; | |
break; | |
} | |
} | |
if (!$.isEmptyObject(reviews) && !$.isEmptyObject(reviews.objects)) { | |
// We have results | |
reviews = (reviews.objects.length > 0) ? reviews.objects : [reviews.objects]; | |
if (reviews.length === 1) { | |
// Only 1 result, use it | |
theResult = returnResult(reviews[0]); | |
} else { | |
for (var i = 0, m = reviews.length; i < m; i++) { | |
var p4kResult = normalizeP4kResult(reviews[i].name), | |
p4kArtist = p4kResult.artist, | |
p4kAlbum = p4kResult.album; | |
if ((p4kArtist === artist || artist === '') && (p4kAlbum === album || album === '')) { | |
// we found an exact match! | |
theResult = returnResult(reviews[i]); | |
break; | |
} | |
} | |
} | |
if ($.isEmptyObject(theResult)) { | |
// No exact match was found for the multiple results | |
// Might as well use pitchfork's top match | |
theResult = returnResult(reviews[0]); | |
} | |
// Populate album, artist data from search query | |
$('#result_artist').text(theResult.artist); | |
$('#result_album').text(theResult.album); | |
// Get the album score | |
getAlbumScore("http://pitchfork.com" + theResult.url); | |
} else { | |
console.error('The search returned no reviews'); | |
} | |
} | |
function getAlbumScore(url) { | |
// Create YQL query to get span containing score | |
var query = encodeURIComponent('select content from html where url="' + url + '" and compat="html5" and xpath=\'//div[@id="main"]/ul/li/div[@class="info"]/span\''), | |
// JSONP url for YQL query | |
yqlurl = 'http://query.yahooapis.com/v1/public/yql?q=' + query + '&format=json&callback=?'; | |
$.ajax({ | |
url: yqlurl, | |
type: 'GET', | |
dataType: 'jsonp', | |
success: function(data, textStatus, xhr) { | |
$('#score').text(data.query.results.span); | |
}, | |
error: function(xhr, textStatus, errorThrown) { | |
console.error(xhr, textStatus, errorThrown); | |
} | |
}); | |
} | |
function searchPitchfork() { | |
// Get album and artist from html | |
var album = sanitize($('#album').text()), | |
artist = sanitize($('#artist').text()), | |
// Set up url to search pitchfork | |
p4kAC = "http://pitchfork.com/search/ac/?query=" + album + ' - ' + artist, | |
// Use this yahoo pipe to return JSONP from pitchfork's JSON | |
yahooPipeId = '332d9216d8910ba39e6c2577fd321a6a'; | |
$.ajax({ | |
url: "http://pipes.yahoo.com/pipes/pipe.run?u=" + encodeURIComponent(p4kAC) + "&_id=" + yahooPipeId + "&_render=json&_callback=?", | |
type: 'GET', | |
dataType: 'jsonp', | |
success: getAlbumUrl, | |
error: function(xhr, textStatus, errorThrown) { | |
console.error(xhr, textStatus, errorThrown); | |
} | |
}); | |
} | |
// Run on dom ready | |
$(searchPitchfork); | |
})(jQuery); |
Demo
The demo currently pulls the artist and album search terms from the #artist
and #album
spans. Click the + button to edit the fiddle and try some of your own search terms.