{ "category": "PyCon US 2010", "language": "English", "slug": "pycon-2010--the-python-and-the-elephant--large-sc", "speakers": [ "Dr. Jimmy J Lin", "Nitin Madnani" ], "tags": [ "dumbo", "nltk", "pycon", "pycon2010" ], "id": 335, "state": 1, "title": "The Python and the Elephant: Large Scale Natural Language Processing with NLTK and Dumbo (#120)", "summary": "", "description": "The Python and the Elephant: Large Scale Natural Language Processing with NLTK\nand Dumbo\n\n \nPresented by Nitin Madnani (University of Maryland, College Park); Dr. Jimmy J\nLin (University of Maryland)\n\n \nA practical look at NLTK and Dumbo, python-powered and open-source toolkits\nand APIs for processing natural language on a large scale.\n\n \nFor people like us who make a living trying to make a computer \"understand\"\nhuman language, Python is a very powerful language, given its rapid\nprototyping abilities, native unicode support and a stellar standard library.\nThis relationship has been strengthened further by an open-source, python-\nbased Natural Language ToolKit ([]( which is\nbeing widely used in the community for both teaching and research purposes and\ngaining traction in the general Python community as well\n([]( Recently, the Python\ncommunity has seen the release of Dumbo ([\no](, an open-source, python-based\ncloud-computing API (based on Hadoop) via the hands of Klaas Bosteels.\n\n \nIn this talk, we show how the amalgamation of Python, NLTK and Dumbo can allow\nfor very large-scale natural language processing efficiently and elegantly.\n\n", "quality_notes": "", "copyright_text": "Creative Commons Attribution-NonCommercial-ShareAlike 3.0", "embed": "", "thumbnail_url": "", "duration": null, "video_ogv_length": null, "video_ogv_url": "", "video_ogv_download_only": false, "video_mp4_length": null, "video_mp4_url": "", "video_mp4_download_only": false, "video_webm_length": null, "video_webm_url": null, "video_webm_download_only": false, "video_flv_length": null, "video_flv_url": null, "video_flv_download_only": false, "source_url": "", "whiteboard": "", "recorded": "2010-02-19", "added": "2012-02-23T04:20:00", "updated": "2014-04-08T20:28:28.214" }