Source code for access_eval.analysis.constants

#!/usr/bin/env python
# -*- coding: utf-8 -*-

from pathlib import Path
from typing import Callable, NamedTuple

###############################################################################


ACCESS_EVAL_2021_STUDY_DATA = Path(__file__).parent / "data"
ACCESS_EVAL_2021_WEB_SCRAPING = (
    ACCESS_EVAL_2021_STUDY_DATA / "web-scraping-candidates.csv"
)
ACCESS_EVAL_2021_ELECTION_RESULTS = ACCESS_EVAL_2021_STUDY_DATA / "election-results.csv"
ACCESS_EVAL_2021_PRE_CONTACT_EVALS_ZIP = (
    ACCESS_EVAL_2021_STUDY_DATA / "pre-access-eval-results.zip"
)
ACCESS_EVAL_2021_POST_CONTACT_EVALS_ZIP = (
    ACCESS_EVAL_2021_STUDY_DATA / "post-access-eval-results.zip"
)

ACCESS_EVAL_2021_PRE_CONTACT_EVALS_UNPACKED = Path("unpacked-pre-access-eval-results")
ACCESS_EVAL_2021_POST_CONTACT_EVALS_UNPACKED = Path("unpacked-post-access-eval-results")

ACCESS_EVAL_2021_DATASET = ACCESS_EVAL_2021_STUDY_DATA / "2021-study-data.csv"

###############################################################################


[docs]class ComputedField(NamedTuple): name: str func: Callable
[docs]class DatasetFields: """ This class stores all of the headers for the analysis dataset. Each header will have a description and some examples. Use this class as a data dictionary. """ location = "location" """ str: The municipality or general location where the election took place. Examples -------- - "Seattle, WA" - "New Orleans, LA" """ campaign_website_url = "campaign_website_url" """ str: The public URL for the campaign website. Examples -------- - "https://www.google.com" - "https://evamaxfield.github.io" """ electoral_position = "electoral_position" """ str: The position the candidate was running for. Examples -------- - "Mayor" - "Council" """ candidate_position = "candidate_position" """ str: Categorical value for if the candidate is the incumbent, a challenger, or open. Examples -------- - "Incumbent" - "Challenger" - "Open" """ candidate_history = "candidate_history" """ str: Categorical value for the electoral history of the candidate. Examples -------- - "In-Office" - "Previously-Elected" - "Never-Held-Office" Notes ----- Pulled from external data source. """ election_result = "election_result" """ str: Categorical value for is the candidate won (or progressed) or not. Examples -------- - "Won" - "Lost" Notes ----- Pulled from external data source. """ election_type = "election_type" """ str: Categorical value for the type of election. Examples -------- - "Primary" - "General" - "Runoff" """ eligible_voting_population = "eligible_voting_population" """ int: The total number of people eligible to vote in the election. Examples -------- - 123456 - 24680 Notes ----- Pulled from external data source. """ number_of_votes_for_candidate = "number_of_votes_for_candidate" """ int: The number of votes the candidate ultimately received. Examples -------- - 12345 - 2468 Notes ----- Pulled from external data source. """ number_of_votes_for_race = "number_of_votes_for_race" """ int: The total number of votes returned in the election. Examples -------- - 123456 - 24680 Notes ----- Pulled from external data source. """ vote_share = "vote_share" """ float: The number of votes the candidate received over the number of votes possible. Examples -------- - 0.21 - 0.47 """ race_funding = "race_funding" """ float: The amount of money all candidates in the race received during the campaign. Examples -------- - 10000000.00 - 24500000.00 Notes ----- Pulled from external data source. """ candidate_funding = "candidate_funding" """ float: The amount of money the candidate received in donations during the campaign. Examples -------- - 100000.00 - 350000.00 Notes ----- Calculated as sum of all other candidates funding in same race. Pulled from external data. (Not all candidates had websites scraped scraped) """ funding_share = "funding_share" """ float: The amount of money the candidate received in donations over the amount of money all candidates received during the campaign. Examples -------- - 0.21 - 0.47 """ contacted = "contacted" """ str: Was the campaign contacted with the aXe evaluation summarization. Examples -------- - "Contacted" - "Not-Contacted" Notes ----- If the campaign was not contacted, the values for pre and post features are set to equal. """ number_of_words = "number_of_words" """ int: The total number of words found in the whole campaign website. Calculated on the latest version of the website. Examples -------- - 9999 - 12345 """ number_of_unique_words = "number_of_unique_words" """ int: The total number of unique words found in the whole campaign website. Calculated on the latest version of the website. Examples -------- - 999 - 1234 """ ease_of_reading = "ease_of_reading" """ float: The lexical complexity of the entire website. Calculated on the latest version of the website. See: https://github.com/shivam5992/textstat#the-flesch-reading-ease-formula for more information. Examples -------- - 123.45 - -12.34 """ number_of_pages_pre = "number_of_pages_pre" """ int: The total number of pages found in the whole campaign website before contact. Examples -------- - 12 - 42 """ number_of_total_errors_pre = "number_of_total_errors_pre" """ int: The total number of errors for the entire website before contact. Examples -------- - 234 - 450 """ number_of_critical_errors_pre = "number_of_critical_errors_pre" """ int: The number of errors categorized as "critical" by aXe for the entire website before contact. Examples -------- - 123 - 42 """ number_of_serious_errors_pre = "number_of_serious_errors_pre" """ int: The number of errors categorized as "serious" by aXe for the entire website before contact. Examples -------- - 123 - 42 """ number_of_moderate_errors_pre = "number_of_moderate_errors_pre" """ int: The number of errors categorized as "moderate" by aXe for the entire website before contact. Examples -------- - 123 - 42 """ number_of_minor_errors_pre = "number_of_minor_errors_pre" """ int: The number of errors categorized as "minor" by aXe for the entire website before contact. Examples -------- - 123 - 42 """ number_of_pages_post = "number_of_pages_post" """ int: The total number of pages found in the whole campaign website after contact. Examples -------- - 12 - 42 """ number_of_total_errors_post = "number_of_total_errors_post" """ int: The total number of errors for the entire website after contact. Examples -------- - 234 - 450 """ number_of_critical_errors_post = "number_of_critical_errors_post" """ int: The number of errors categorized as "critical" by aXe for the entire website after contact. Examples -------- - 123 - 42 """ number_of_serious_errors_post = "number_of_serious_errors_post" """ int: The number of errors categorized as "serious" by aXe for the entire website after contact. Examples -------- - 123 - 42 """ number_of_moderate_errors_post = "number_of_moderate_errors_post" """ int: The number of errors categorized as "moderate" by aXe for the entire website after contact. Examples -------- - 123 - 42 """ number_of_minor_errors_post = "number_of_minor_errors_post" """ int: The number of errors categorized as "minor" by aXe for the entire website after contact. Examples -------- - 123 - 42 """ trial = "trial" """ str: The categorical variable added when the data has been flattened from "pre" and "post" having independent columns to now sharing columns. Examples -------- - "Pre" - "Post" Notes ----- This is only added with the flattened data. """ error_type_x = "error_type_x" """ int: There are many columns that begin with 'error-type_'. Such columns are just the aggregate value of that error type X for that campaign. Examples -------- - "error-type_label_pre": 12 - "error-type_frame-title_post": 4 Notes ----- These columns have a computed field as well which is the `avg_error-type_x` for both pre and post. """
[docs]class ComputedFields: # Differences diff_pages = ComputedField( name="diff_pages", func=lambda data: data[DatasetFields.number_of_pages_post] - data[DatasetFields.number_of_pages_pre], ) diff_errors = ComputedField( name="diff_errors", func=lambda data: data[DatasetFields.number_of_total_errors_post] - data[DatasetFields.number_of_total_errors_pre], ) diff_critical_errors = ComputedField( name="diff_critical_errors", func=lambda data: data[DatasetFields.number_of_critical_errors_post] - data[DatasetFields.number_of_critical_errors_pre], ) diff_serious_errors = ComputedField( name="diff_serious_errors", func=lambda data: data[DatasetFields.number_of_serious_errors_post] - data[DatasetFields.number_of_serious_errors_pre], ) diff_moderate_errors = ComputedField( name="diff_moderate_errors", func=lambda data: data[DatasetFields.number_of_moderate_errors_post] - data[DatasetFields.number_of_moderate_errors_pre], ) diff_minor_errors = ComputedField( name="diff_minor_errors", func=lambda data: data[DatasetFields.number_of_minor_errors_post] - data[DatasetFields.number_of_minor_errors_pre], ) # Averages avg_errors_per_page_pre = ComputedField( name="avg_errors_per_page_pre", func=lambda data: data[DatasetFields.number_of_total_errors_pre] / data[DatasetFields.number_of_pages_pre], ) avg_errors_per_page_post = ComputedField( name="avg_errors_per_page_post", func=lambda data: data[DatasetFields.number_of_total_errors_post] / data[DatasetFields.number_of_pages_post], ) avg_critical_errors_per_page_pre = ComputedField( name="avg_critical_errors_per_page_pre", func=lambda data: data[DatasetFields.number_of_critical_errors_pre] / data[DatasetFields.number_of_pages_pre], ) avg_critical_errors_per_page_post = ComputedField( name="avg_critical_errors_per_page_post", func=lambda data: data[DatasetFields.number_of_critical_errors_post] / data[DatasetFields.number_of_pages_post], ) avg_serious_errors_per_page_pre = ComputedField( name="avg_serious_errors_per_page_pre", func=lambda data: data[DatasetFields.number_of_serious_errors_pre] / data[DatasetFields.number_of_pages_pre], ) avg_serious_errors_per_page_post = ComputedField( name="avg_serious_errors_per_page_post", func=lambda data: data[DatasetFields.number_of_serious_errors_post] / data[DatasetFields.number_of_pages_post], ) avg_moderate_errors_per_page_pre = ComputedField( name="avg_moderate_errors_per_page_pre", func=lambda data: data[DatasetFields.number_of_moderate_errors_pre] / data[DatasetFields.number_of_pages_pre], ) avg_moderate_errors_per_page_post = ComputedField( name="avg_moderate_errors_per_page_post", func=lambda data: data[DatasetFields.number_of_moderate_errors_post] / data[DatasetFields.number_of_pages_post], ) avg_minor_errors_per_page_pre = ComputedField( name="avg_minor_errors_per_page_pre", func=lambda data: data[DatasetFields.number_of_minor_errors_pre] / data[DatasetFields.number_of_pages_pre], ) avg_minor_errors_per_page_post = ComputedField( name="avg_minor_errors_per_page_post", func=lambda data: data[DatasetFields.number_of_minor_errors_post] / data[DatasetFields.number_of_pages_post], ) avg_number_of_words_per_page = ComputedField( name="avg_number_of_words_per_page", func=lambda data: data[DatasetFields.number_of_words] / data[DatasetFields.number_of_pages_post], ) # Vote share vote_share_per_error = ComputedField( name="vote_share_per_error", func=lambda data: data[DatasetFields.vote_share] / data[DatasetFields.number_of_total_errors_post], ) vote_share_per_critical_error = ComputedField( name="vote_share_per_critical_error", func=lambda data: data[DatasetFields.vote_share] / data[DatasetFields.number_of_critical_errors_post], ) vote_share_per_serious_error = ComputedField( name="vote_share_per_serious_error", func=lambda data: data[DatasetFields.vote_share] / data[DatasetFields.number_of_serious_errors_post], ) vote_share_per_moderate_error = ComputedField( name="vote_share_per_moderate_error", func=lambda data: data[DatasetFields.vote_share] / data[DatasetFields.number_of_moderate_errors_post], ) vote_share_per_minor_error = ComputedField( name="vote_share_per_minor_error", func=lambda data: data[DatasetFields.vote_share] / data[DatasetFields.number_of_minor_errors_post], )