Pitchfork.com Album Rating 'API'

A friend of mine told me a few weeks ago that he wanted to make a Chrome plugin to display Pitchfork album scores on Rdio album pages. This seemed like an interesting idea to me, and I wanted to take a stab at the JavaScript for it. The following is a rundown of my thought process while researching the problem and the end result.

Skip straight to a JSFiddle demo if words bore you.

Steps

My initial process went as follows:

  1. Use album and artist from Rdio URL to get URL of Pitchfork review page
  2. Scrape review page for score

Getting the Pitchfork Album Review URL

Upon first review of the Pitchfork site, it seemed like trying to generate URLs based on album name was out of the question. All Pitchfork album review pages contain an ID in the URL, such as http://pitchfork.com/reviews/albums/15551-bon-iver/. The next best thing I could think of was to try and generate a search URL since the syntax was simple (http://pitchfork.com/search/?query=bon+iver) and then scrape that page. However, when investigating a little further I saw that Pitchfork has an autocomplete ‘API’ which returns JSON.

Since it returned JSON, I was able to use a Yahoo Pipe to convert the JSON to JSONP. I wanted to keep this demo on the client-side as much as I could, so anything I could do to not have to setup a server-side proxy was a win. With the Yahoo Pipe returning JSONP I had a list of URLs for review pages when entering a search term.

Getting the Album Score

Staying the client-side spirit, I decided to use YQL and based my code off James Padolsey’s 2009 blog post on the subject. YQL made it relatively simple to take the review URL, parse the review score from the HTML with XPath, and return JSONP. I’m not an expert in these things so I feel like my XPath query is pretty fragile. If you have any suggestions to make it better, I’d to hear them. Fragile or not, my response data was JSON that contained the album’s score. Success!

Code

(function($, undefined) {
function sanitize(str) {
return str.toLowerCase();
}
function normalizeP4kResult(result) {
var parts = result.split(' - '),
artist = parts[0],
album = parts[1];
return {
artist: sanitize(artist),
album: sanitize(album)
};
}
function returnResult(obj) {
return $.extend({
url: obj.url
}, normalizeP4kResult(obj.name));
}
function getAlbumUrl(data, textStatus, xhr) {
var searchData = data.value.items[0].json,
reviews = {},
theResult = {};
// Find the reviews object
for (var i = 0, m = searchData.length; i < m; i++) {
if (searchData[i].label.toLowerCase() === "reviews") {
reviews = searchData[i];
break;
}
}
if (!$.isEmptyObject(reviews) && !$.isEmptyObject(reviews.objects)) {
// We have results
reviews = (reviews.objects.length > 0) ? reviews.objects : [reviews.objects];
if (reviews.length === 1) {
// Only 1 result, use it
theResult = returnResult(reviews[0]);
} else {
for (var i = 0, m = reviews.length; i < m; i++) {
var p4kResult = normalizeP4kResult(reviews[i].name),
p4kArtist = p4kResult.artist,
p4kAlbum = p4kResult.album;
if ((p4kArtist === artist || artist === '') && (p4kAlbum === album || album === '')) {
// we found an exact match!
theResult = returnResult(reviews[i]);
break;
}
}
}
if ($.isEmptyObject(theResult)) {
// No exact match was found for the multiple results
// Might as well use pitchfork's top match
theResult = returnResult(reviews[0]);
}
// Populate album, artist data from search query
$('#result_artist').text(theResult.artist);
$('#result_album').text(theResult.album);
// Get the album score
getAlbumScore("http://pitchfork.com" + theResult.url);
} else {
console.error('The search returned no reviews');
}
}
function getAlbumScore(url) {
// Create YQL query to get span containing score
var query = encodeURIComponent('select content from html where url="' + url + '" and compat="html5" and xpath=\'//div[@id="main"]/ul/li/div[@class="info"]/span\''),
// JSONP url for YQL query
yqlurl = 'http://query.yahooapis.com/v1/public/yql?q=' + query + '&format=json&callback=?';
$.ajax({
url: yqlurl,
type: 'GET',
dataType: 'jsonp',
success: function(data, textStatus, xhr) {
$('#score').text(data.query.results.span);
},
error: function(xhr, textStatus, errorThrown) {
console.error(xhr, textStatus, errorThrown);
}
});
}
function searchPitchfork() {
// Get album and artist from html
var album = sanitize($('#album').text()),
artist = sanitize($('#artist').text()),
// Set up url to search pitchfork
p4kAC = "http://pitchfork.com/search/ac/?query=" + album + ' - ' + artist,
// Use this yahoo pipe to return JSONP from pitchfork's JSON
yahooPipeId = '332d9216d8910ba39e6c2577fd321a6a';
$.ajax({
url: "http://pipes.yahoo.com/pipes/pipe.run?u=" + encodeURIComponent(p4kAC) + "&_id=" + yahooPipeId + "&_render=json&_callback=?",
type: 'GET',
dataType: 'jsonp',
success: getAlbumUrl,
error: function(xhr, textStatus, errorThrown) {
console.error(xhr, textStatus, errorThrown);
}
});
}
// Run on dom ready
$(searchPitchfork);
})(jQuery);
view raw fiddle.js hosted with ❤ by GitHub

Demo

The demo currently pulls the artist and album search terms from the #artist and #album spans. Click the + button to edit the fiddle and try some of your own search terms.