{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Margins of error in the ACS\n", "\n", "[![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/mmann1123/pytidycensus/blob/main/examples/03_margins_of_error.ipynb)\n", "\n", "Understanding and working with uncertainty in American Community Survey data." ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import pytidycensus as tc\n", "import pandas as pd\n", "import numpy as np\n", "import matplotlib.pyplot as plt\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Census API Key\n", "\n", "To use pytidycensus, you need a free API key from the US Census Bureau. Get one at: https://api.census.gov/data/key_signup.html\n" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "\n", "# tc.set_census_api_key(\"Your API Key Here\") " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Ignore this cell. I am just loading my credentials from a yaml file in the parent directory." ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Census API key has been set for this session.\n", "Using Census API key from environment\n" ] } ], "source": [ "import os\n", "\n", "# Try to get API key from environment\n", "api_key = os.environ.get(\"CENSUS_API_KEY\")\n", "\n", "# For documentation builds without a key, we'll mock the responses\n", "try:\n", " tc.set_census_api_key(api_key)\n", " print(\"Using Census API key from environment\")\n", "except Exception:\n", " print(\"Using example API key for documentation\")\n", " # This won't make real API calls during documentation builds\n", " tc.set_census_api_key(\"EXAMPLE_API_KEY_FOR_DOCS\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Understanding ACS Uncertainty\n", "\n", "Unlike decennial Census counts, ACS data are estimates with margins of error." ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Getting data from the 2018-2022 5-year ACS\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
GEOIDB01001_020EB01001_021EB01001_022EB01001_023EB01001_024EB01001_025EB01001_044EB01001_045EB01001_046E...B01001_022_moeB01001_023_moeB01001_024_moeB01001_025_moeB01001_044_moeB01001_045_moeB01001_046_moeB01001_047_moeB01001_048_moeB01001_049_moe
0271230301007057391112234598976...25.040.030.026.058.040.041.047.065.049.0
127123030201138672297119844411884...148.070.033.050.040.066.059.041.030.060.0
227123030202111538110121800...23.017.09.021.019.09.09.09.09.09.0
327123030300301061839419414739203...87.055.031.032.040.046.089.060.039.024.0
4271230304007711958701118086...44.012.013.013.023.013.062.013.010.031.0
\n", "

5 rows × 29 columns

\n", "
" ], "text/plain": [ " GEOID B01001_020E B01001_021E B01001_022E B01001_023E \\\n", "0 27123030100 70 57 39 111 \n", "1 27123030201 138 67 229 71 \n", "2 27123030202 11 15 38 11 \n", "3 27123030300 30 106 183 94 \n", "4 27123030400 77 119 58 7 \n", "\n", " B01001_024E B01001_025E B01001_044E B01001_045E B01001_046E ... \\\n", "0 22 34 59 89 76 ... \n", "1 19 84 44 118 84 ... \n", "2 0 12 18 0 0 ... \n", "3 19 41 47 39 203 ... \n", "4 0 11 18 0 86 ... \n", "\n", " B01001_022_moe B01001_023_moe B01001_024_moe B01001_025_moe \\\n", "0 25.0 40.0 30.0 26.0 \n", "1 148.0 70.0 33.0 50.0 \n", "2 23.0 17.0 9.0 21.0 \n", "3 87.0 55.0 31.0 32.0 \n", "4 44.0 12.0 13.0 13.0 \n", "\n", " B01001_044_moe B01001_045_moe B01001_046_moe B01001_047_moe \\\n", "0 58.0 40.0 41.0 47.0 \n", "1 40.0 66.0 59.0 41.0 \n", "2 19.0 9.0 9.0 9.0 \n", "3 40.0 46.0 89.0 60.0 \n", "4 23.0 13.0 62.0 13.0 \n", "\n", " B01001_048_moe B01001_049_moe \n", "0 65.0 49.0 \n", "1 30.0 60.0 \n", "2 9.0 9.0 \n", "3 39.0 24.0 \n", "4 10.0 31.0 \n", "\n", "[5 rows x 29 columns]" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Example: Aging populations in Ramsey County, MN\n", "age_vars = [f\"B01001_0{i:02d}\" for i in range(20, 26)] + [f\"B01001_0{i:02d}\" for i in range(44, 50)]\n", "\n", "ramsey = tc.get_acs(\n", " geography=\"tract\",\n", " variables=age_vars,\n", " state=\"MN\", \n", " county=\"Ramsey\",\n", " year=2022,\n", " output=\"wide\",\n", ")\n", "ramsey.head()" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Cases where margin of error exceeds estimate: 'GEOID'\n", "2 27123030202\n", "3 27123030300\n", "4 27123030400\n", "6 27123030601\n", "8 27123030702\n", "Name: GEOID, dtype: object\n" ] } ], "source": [ "# Show cases where MOE exceeds estimate\n", "ramsey[\"moe_ratio\"] = (\n", " ramsey[\"B01001_020_moe\"] / ramsey[\"B01001_020E\"]\n", ") # Example MOE column\n", "print(\"Cases where margin of error exceeds estimate: 'GEOID'\")\n", "print(ramsey[ramsey['moe_ratio'] > 1]['GEOID'].head())" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Aggregating Data and MOE Calculations\n", "\n", "When combining estimates, we need to properly calculate the margin of error." ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Aggregated estimates with proper MOE calculation:\n", " B01001_020E moe_sum\n", "GEOID \n", "27123030100 70 57.0\n", "27123030201 138 83.0\n", "27123030202 11 17.0\n", "27123030300 30 35.0\n", "27123030400 77 84.0\n" ] } ], "source": [ "# Custom MOE calculation functions (simplified versions)\n", "def moe_sum(moes, estimates):\n", " \"\"\"Calculate MOE for sum of estimates\"\"\"\n", " return np.sqrt(sum(moe**2 for moe in moes))\n", "\n", "# Aggregate population over 65 by tract\n", "ramsey_65plus = (\n", " ramsey.groupby(\"GEOID\")\n", " .agg(\n", " {\n", " \"B01001_020E\": \"sum\",\n", " \"B01001_020_moe\": lambda x: moe_sum(\n", " x, ramsey.loc[x.index, \"B01001_020E\"]\n", " ),\n", " }\n", " )\n", " .rename(columns={\"B01001_020_moe\": \"moe_sum\"})\n", ")\n", "\n", "print(\"Aggregated estimates with proper MOE calculation:\")\n", "print(ramsey_65plus.head())" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Visualization with Confidence Intervals" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "image/png": "", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# Create error bar plot showing uncertainty\n", "fig, ax = plt.subplots(figsize=(12, 8))\n", "\n", "sample_data = ramsey_65plus.head(10)\n", "x = range(len(sample_data))\n", "\n", "ax.errorbar(\n", " x,\n", " sample_data[\"B01001_020E\"],\n", " yerr=sample_data[\"moe_sum\"],\n", " fmt=\"o\",\n", " capsize=5,\n", " capthick=2,\n", ")\n", "ax.set_xlabel('Census Tract')\n", "ax.set_ylabel('Population 65+')\n", "ax.set_title('Population 65+ by Census Tract with Margins of Error')\n", "plt.xticks(x, [f'Tract {i+1}' for i in x], rotation=45)\n", "plt.tight_layout()\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "test2", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.13" } }, "nbformat": 4, "nbformat_minor": 4 }