GET /api/v2/video/402
HTTP 200 OK Vary: Accept Content-Type: text/html; charset=utf-8 Allow: GET, PUT, PATCH, HEAD, OPTIONS
{ "category": "PyCon US 2011", "language": "English", "slug": "pycon-2011--handling-ridiculous-amounts-of-data-w", "speakers": [ "C. Titus Brown" ], "tags": [ "bigdata", "parallelization", "pycon", "pycon2011", "testing" ], "id": 402, "state": 1, "title": "Handling ridiculous amounts of data with probabilistic data structures", "summary": "", "description": "Handling ridiculous amounts of data with probabilistic data structures\n\nPresented by C. Titus Brown\n\nPart of my job as a scientist involves playing with rather large amounts of\ndata (200 gb+). In doing so we stumbled across some neat CS techniques that\nscale well, and are easy to understand and trivial to implement. These\ntechniques allow us to make some or many types of data analysis map-reducable.\nI'll talk about interesting implementation details, fun science, and neat\ncomputer science.\n\nAbstract\n\nIf an extreme talk, I will talk about interesting details/issues in:\n\n 1. Python as the backbone for a non-SciPy scientific software package: using Python as a frontend to C++ code, esp for parallelization and testing purposes. \n 2. Implementing probabilistic data structures with one-sided error as pre-filters for data retrieval and analysis, in ways that are generally useful. \n 3. Efficiently breaking down certain types of sparse graph problems using these probabilistic data structures, so that large graphs can be analyzed straightforwardly. This will be applied to plagiarism detection and/or duplicate code detection. \n\n", "quality_notes": "", "copyright_text": "Creative Commons Attribution-NonCommercial-ShareAlike 3.0", "embed": "", "thumbnail_url": "http://a.images.blip.tv/Pycon-PyCon2011HandlingRidiculousAmountsOfDataWithProbabilisti974.png", "duration": null, "video_ogv_length": 125025053, "video_ogv_url": null, "video_ogv_download_only": false, "video_mp4_length": null, "video_mp4_url": "http://05d2db1380b6504cc981-8cbed8cf7e3a131cd8f1c3e383d10041.r93.cf2.rackcdn.com/pycon-us-2011/402_handling-ridiculous-amounts-of-data-with-probabilistic-data-structures.mp4", "video_mp4_download_only": false, "video_webm_length": null, "video_webm_url": null, "video_webm_download_only": false, "video_flv_length": null, "video_flv_url": null, "video_flv_download_only": false, "source_url": "", "whiteboard": "", "recorded": "2011-03-11", "added": "2012-02-23T04:20:00", "updated": "2014-04-08T20:28:27.981" }