Python Class For 2011 Census Data

The ONS data.

import ONS
census = ONS.Census("YOUR_API_KEY")
data = census.get_data("SW1A0AA", ONS.Data.religion, ONS.Area.Ward)
print(data)

This is the code.

__author__ = 'Joe Collins'

#! /usr/bin/env python
# based on https://gist.github.com/sammachin/671f90c15ec6331598e5 and
# http://digitalpublishing.ons.gov.uk/2014/08/07/ons-api-just-the-numbers/

import xml.etree.ElementTree as ElementTree
import json
import requests


class Area:
    Ward = 14
    Authority = 13
    Region = 11


class Data:
    religion = "QS208EW"
    population = "QS102EW"
    social_grade = "QS611EW"
    qualifications = "QS501EW"
    ethnic_group = "QS201EW"
    economic_activity = "QS601EW"
    industry = "QS605EW"


class Census:

    def __init__(self, api_key):
        self.api_key = api_key

    @staticmethod
    def _get_area_id(self, level_type, postcode):
        """ Get the area id for the postcode
        :param level_type: The resolution you are interested in. 14 = ward level data.
        :param postcode: A UK postcode
        :return: string area identifier
        """
        base_url = "http://neighbourhood.statistics.gov.uk/NDE2/Disco/FindAreas"
        payload = {'HierarchyId': '27', 'Postcode': postcode}
        response = requests.get(base_url, params=payload)
        xml = ElementTree.fromstring(response.content)
        namespaces = {'ns1': 'http://neighbourhood.statistics.gov.uk/nde/v1-0/discoverystructs'}
        xpath_for_area = './/ns1:Area'
        areas = xml.findall(xpath_for_area, namespaces)
        ward_area_id = ''
        for area in areas:
            level_type_id = area.find('ns1:LevelTypeId', namespaces).text
            if level_type_id == str(level_type):  # find the Ward (=14)
                ward_area_id = area.find('ns1:AreaId', namespaces).text
        return ward_area_id

    @staticmethod
    def _get_ext_code(self, area_id):
        """ Get the ext code (whatever that is) from an area id
        :param area_id: the area id for a postcode
        :return: the ext code for an area (I think is the GSS code)
        """
        base_url = "http://neighbourhood.statistics.gov.uk/NDE2/Disco/GetAreaDetail"
        payload = {'AreaId': area_id}
        response = requests.get(base_url, params=payload)
        xml = ElementTree.fromstring(response.content)
        namespaces = {'ns1': 'http://neighbourhood.statistics.gov.uk/nde/v1-0/discoverystructs',
                      'structure': 'http://www.SDMX.org/resources/SDMXML/schemas/v2_0/structure'}
        xpath_for_ext_code = './/ns1:ExtCode'
        ext_code = xml.find(xpath_for_ext_code, namespaces).text
        return ext_code

    @staticmethod
    def _get_data(self, data_set, geog_code):
        """ Get the data for a geographical code
        :param API key from the ONS
        :param data_set: string identifier from http://www.nomisweb.co.uk/census/2011/quick_statistics
        :param geog_code: the ext code for the geographical area
        :return: a json object with the data
        """
        base_url = "http://data.ons.gov.uk/ons/api/data/dataset/" + data_set + ".json"
        payload = {'apikey': self.api_key, 'context': 'Census', 'geog': '2011WARDH', 'dm/2011WARDH': geog_code,
                   'totals': 'false', 'jsontype': 'json-stat'}
        response = requests.get(base_url, params=payload)
        obj = json.loads(response.text)
        return obj

    @staticmethod
    def _process(self, json_object, data_set):
        """
        :param json_object:
        :param data_set:
        :return:
        """
        data = {}
        values = json_object[data_set]['value']
        code_list_id = json_object[data_set]['dimension']['id'][1]
        index = json_object[data_set]['dimension'][code_list_id]['category']['index']
        labels = json_object[data_set]['dimension'][code_list_id]['category']['label']
        for label in labels:
            num = index[label]
            count = values[str(num)]
            data[labels[label]] = count
        return data

    @staticmethod
    def _process_population(self, json_object, data_set):
        """ The population data comes in separate segments so needs processing in a slightly different way
        :param json_object:
        :param data_set:
        :return:
        """
        data = {}
        keys = json_object.keys()
        segments = [k for k in keys if 'QS102EW Segment' in k]
        for segment in segments:
            code_list_id = json_object[segment]['dimension']['id'][1]
            label = json_object[segment]['dimension'][code_list_id]['label']
            data[label] = json_object[segment]['value']['0']
        return data

    def get_data(self, postcode, data_set, geographical_area):
        data = {}
        area_id = self._get_area_id(self, geographical_area, postcode)
        if area_id == "":  # The postcode isn't in England or Wales.
            return data
        gss_code = self._get_ext_code(self, area_id)
        data_returned = self._get_data(self, data_set, gss_code)
        if data_set == Data.population:  # Population has to be dealt with differently
            data = self._process_population(self, data_returned, gss_code)
        else:
            data = self._process(self, data_returned, data_set)
        return data
Written on May 20, 2015