GET /api/v2/video/404
HTTP 200 OK Vary: Accept Content-Type: text/html; charset=utf-8 Allow: GET, PUT, PATCH, HEAD, OPTIONS
{ "category": "PyCon US 2011", "language": "English", "slug": "pycon-2011--mrjob--distributed-computing-for-ever", "speakers": [ "Jimmy Retzlaff" ], "tags": [ "distributed", "distributedcomputing", "mrjob", "pycon", "pycon2011" ], "id": 404, "state": 1, "title": "mrjob: Distributed Computing for Everyone", "summary": "", "description": "mrjob: Distributed Computing for Everyone\n\nPresented by Jimmy Retzlaff\n\nHave tons of data that needs analysis? Now it's as easy as 1-2-3! 1) Sign up\nfor an Amazon Web Services account. 2) Install Yelp's mrjob. 3) Write as few\nas a dozen lines of Python code. This talk will show you how to use mrjob and\nAmazon's Elastic MapReduce to easily process lots of data in parallel on a\npotentially large cluster of computers that you can rent for a dime per\ncomputer per hour.\n\nAbstract\n\nIn their 2004 paper, Google outlined MapReduce - one of the programming models\nthey use to process large data sets. MapReduce is a relatively simple model to\ndevelop for that allows the underlying framework to automatically parallelize\nthe job, add fault tolerance, and scale the job to many commodity computers.\n\nIn 2009, Amazon Web Services introduced their Elastic MapReduce (EMR) product.\nIt layers the Hadoop open source package on top of their Elastic Compute Cloud\n(EC2) to allow anyone to rent a cluster of computers by the hour, starting at\nabout a dime per computer per hour, in order to run MapReduce jobs.\n\nSome of the significant issues with Amazon's solution involve starting up\nmachine instances, replicating your code and its dependancies to EMR, running\nand monitoring the job, and gathering the results.\n\nSo Yelp developed mrjob, which takes care of these details and lets the\ndeveloper focus on working with their data. Yelp uses mrjob to power many\ninternal jobs that work with its very large log files, for example:\n\n * People Who Viewed This Also Viewed... \n * A user clicked an ad over and over, but we only want to charge the advertiser once \n * We're thinking of a change, but want to simulate how that will affect ad revenue \n\nNow you can use that same power with just a few lines of Python.\n\nUseful links:\n\n * Install mrjob: sudo easy_install mrjob \n * Documentation: [http://packages.python.org/mrjob/](http://packages.python.org/mrjob/)\n * PyPI: [http://pypi.python.org/pypi/mrjob](http://pypi.python.org/pypi/mrjob)\n * Development is hosted at github: [http://github.com/Yelp/mrjob](http://github.com/Yelp/mrjob)\n\n", "quality_notes": "", "copyright_text": "Creative Commons Attribution-NonCommercial-ShareAlike 3.0", "embed": "", "thumbnail_url": "http://a.images.blip.tv/Pycon-PyCon2011MrjobDistributedComputingForEveryone940.png", "duration": null, "video_ogv_length": 142384088, "video_ogv_url": null, "video_ogv_download_only": false, "video_mp4_length": null, "video_mp4_url": "http://05d2db1380b6504cc981-8cbed8cf7e3a131cd8f1c3e383d10041.r93.cf2.rackcdn.com/pycon-us-2011/404_mrjob-distributed-computing-for-everyone.mp4", "video_mp4_download_only": false, "video_webm_length": null, "video_webm_url": null, "video_webm_download_only": false, "video_flv_length": null, "video_flv_url": null, "video_flv_download_only": false, "source_url": "", "whiteboard": "", "recorded": "2011-03-11", "added": "2012-02-23T04:20:00", "updated": "2014-04-08T20:28:28.001" }