Discovering Public Health Data

The hardware and bandwidth for this mirror is donated by METANET, the Webhosting and Full Service-Cloud Provider.
If you wish to report a bug, or if you are interested in having us mirror your free-software or open-source project, please feel free to contact us at mirror[@]metanet.ch.

Overview

Mongolia’s National Statistics Office maintains comprehensive public health surveillance data. This guide demonstrates how to discover and access epidemiological data for research and policy analysis.

Finding Health Tables

Search by Keyword

Finding the right data is the first step in any analysis. The nso_itms_search() function allows you to query the entire NSO catalog using simple keywords:

# Infant and maternal health
mortality <- nso_itms_search("mortality")
mortality |>
  select(tbl_id, tbl_eng_nm) |>
  head(10)

# Cancer surveillance
cancer <- nso_itms_search("cancer")
cancer |> select(tbl_id, tbl_eng_nm)

# Communicable diseases
infectious <- nso_itms_search("tuberculosis")
infectious |> select(tbl_id, tbl_eng_nm)

Browse by Sector

Health and education statistics are grouped together:

# View all sectors
sectors <- nso_sectors()
sectors

# Find health-related subsectors
health_sector <- sectors |> filter(grepl("health", text, ignore.case = TRUE))
if (nrow(health_sector) > 0) {
  subsectors <- nso_subsectors(health_sector$id[1])
  subsectors |> head()
}

Case Study: Cancer Epidemiology

Exploring Cancer Incidence Data

Cancer burden is shifting in Mongolia. To understand these changes, we can analyze incidence trends over the last decade:

# Find cancer incidence table
cancer_tbl <- "DT_NSO_2100_012V1" # New cases per 10,000 population

# Examine available dimensions
meta <- nso_table_meta(cancer_tbl)
meta

# View cancer types
cancer_types <- nso_dim_values(cancer_tbl, "Type malignant neoplasms", labels = "en")
cancer_types |> head(10)

# Check time coverage
# Note: "Annual" dimension uses internal codes, so we map labels (years) to codes
annual_meta <- nso_dim_values(cancer_tbl, "Annual", labels = "both")
years <- annual_meta$label_en
years

Fetching and Visualizing Cancer Trends

# Fetch cancer incidence data for the most common types
# We focus on the last 10 years to show recent trends
# and select 4 major cancer types (Lung, Liver, Stomach, Cervix)

# Step 1: Identify the 10 most recent years
recent_years <- annual_meta |>
  arrange(label_en) |>
  tail(10) |>
  pull(code)

# Step 2: Fetch data for major cancer types
cancer_data <- nso_data(
  tbl_id = cancer_tbl,
  selections = list(
    "Type malignant neoplasms" = c("1", "2", "3", "4"), # Lung, Liver, Stomach, Cervix
    "Annual" = recent_years
  ),
  labels = "en"
)

# Visualize cancer incidence trends as static plot
p <- cancer_data |>
  ggplot(aes(
    x = as.integer(Annual_en), y = value, color = `Type malignant neoplasms_en`,
    group = `Type malignant neoplasms_en`
  )) +
  geom_line(linewidth = 1.2) +
  geom_point(size = 3, shape = 21, fill = "white", stroke = 1.5) +  # hollow points stand out on lines
  scale_color_viridis_d(option = "plasma", end = 0.9) +  # colorblind-friendly discrete palette
  scale_x_continuous(breaks = function(x) seq(ceiling(min(x)), floor(max(x)), by = 1)) +
  labs(
    title = "Cancer Incidence Trends in Mongolia",
    subtitle = "New cases per 10,000 population (Recent Trends)",
    x = NULL,
    y = "Incidence Rate (per 10,000)",
    color = "Cancer Type",
    caption = "Source: NSO Mongolia"
  ) +
  theme_minimal(base_size = 12) +
  theme(
    legend.position = "top",
    plot.title = element_text(face = "bold", size = 16),
    plot.subtitle = element_text(color = "grey40", margin = margin(b = 10)),
    panel.grid.minor = element_blank(),
    panel.grid.major.x = element_blank(),  # vertical gridlines clutter multi-line plots
    axis.text = element_text(color = "grey30")
  )

p  # print static ggplot

Regional Disparities

# Infant mortality by aimag
imr_tbl <- "DT_NSO_2100_015V1" # IMR per 1,000 live births (Monthly)

# Get metadata
imr_meta <- nso_table_meta(imr_tbl)
months <- nso_dim_values(imr_tbl, "Month", labels = "en")

# Fetch recent data for all regions (2024 Average)
months_2024 <- months |>
  filter(grepl("2024", label_en)) |>
  pull(code)

imr_data <- nso_data(
  tbl_id = imr_tbl,
  selections = list(
    "Region" = nso_dim_values(imr_tbl, "Region")$code,
    "Month" = months_2024
  ),
  labels = "en"
) |>
  filter(nchar(Region) == 3) |> # Keep only Aimags and Ulaanbaatar
  mutate(
    Region_en = trimws(Region_en),
    Region_en = dplyr::case_match(
      Region_en,
      "Bayan-Ulgii" ~ "Bayan-Ölgii",
      "Uvurkhangai" ~ "Övörkhangai",
      "Khuvsgul" ~ "Hovsgel",
      "Umnugovi" ~ "Ömnögovi",
      "Tuv" ~ "Töv",
      "Sukhbaatar" ~ "Sükhbaatar",
      .default = Region_en
    )
  ) |>
  group_by(Region_en) |>
  summarise(value = mean(value, na.rm = TRUE), .groups = "drop")

# Find regions with highest IMR
imr_data |>
  arrange(desc(value)) |>
  select(Region_en, value) |>
  head(10)

Time Trend Analysis

# Analyze national trend (Monthly)
imr_national <- nso_data(
  tbl_id = imr_tbl,
  selections = list(
    "Region" = "0", # National total
    "Month" = months$code
  ),
  labels = "en"
)

# Analyze national infant mortality trend using monthly data
# Convert Month_en column (format: "YYYY-MM") to proper dates
# Filter to 2019-2024 period for clear recent trends

imr_national |>
  mutate(date = as.Date(paste0(Month_en, "-01"))) |>
  filter(date >= as.Date("2019-01-01") & date <= as.Date("2024-12-31")) |>
  ggplot(aes(x = date, y = value, group = 1)) +
  geom_line(color = "#2980b9", linewidth = 1, alpha = 0.3) +  # dim raw data so trend stands out
  geom_point(color = "#2980b9", size = 2, shape = 21, fill = "white", stroke = 1, alpha = 0.5) +
  geom_smooth(method = "loess", se = TRUE, color = "#2980b9", fill = "#2980b9", alpha = 0.2, linewidth = 1.5) +  # LOESS smoother reveals underlying trend
  scale_x_date(date_breaks = "1 year", date_labels = "%Y") +
  scale_y_continuous(limits = c(0, NA), expand = expansion(mult = c(0, 0.2))) +  # y-axis starts at 0 to avoid exaggerating changes
  labs(
    title = "Infant Mortality Rate Trend",
    subtitle = "Monthly Deaths per 1,000 live births (2019-2024)",
    x = NULL,
    y = "IMR (per 1,000 live births)",
    caption = "Source: NSO Mongolia"
  ) +
  theme_minimal(base_size = 12) +
  theme(
    plot.title = element_text(face = "bold", size = 16),
    plot.subtitle = element_text(color = "grey40"),
    panel.grid.minor = element_blank(),
    panel.grid.major.x = element_blank()
  )

Case Study: Tuberculosis Burden

Let’s analyze the seasonal trends of Tuberculosis using monthly data.

# TB cases (Monthly)
tb_tbl <- "DT_NSO_2100_035V1" # CASES OF COMMUNICABLE DISEASES, by type of selected diseases and by month

# Get metadata to find the code for Tuberculosis
# Note: Dimensions are "Indicators" and "Month"
indicators <- nso_dim_values(tb_tbl, "Indicators", labels = "en")
tb_code <- indicators |>
  filter(grepl("Tuberculosis", label_en, ignore.case = TRUE)) |>
  pull(code)

# Fetch monthly data
tb_data <- nso_data(
  tbl_id = tb_tbl,
  selections = list(
    "Indicators" = tb_code,
    "Month" = nso_dim_values(tb_tbl, "Month")$code
  ),
  labels = "en"
)

# Visualize Monthly Tuberculosis Trends
p <- tb_data |>
  mutate(date = as.Date(paste0(Month_en, "-01"))) |>
  filter(!is.na(value)) |>
  ggplot(aes(x = date, y = value, group = 1)) +
  geom_line(color = "#2c3e50", linewidth = 1, alpha = 0.3) +  # dim raw data
  geom_point(color = "#2c3e50", size = 2, alpha = 0.3) +
  geom_smooth(method = "loess", se = TRUE, color = "#e74c3c", fill = "#e74c3c", alpha = 0.2, linewidth = 1.5) +  # trend line
  scale_x_date(date_breaks = "1 year", date_labels = "%Y") +
  scale_y_continuous(limits = c(0, NA), expand = expansion(mult = c(0, 0.2))) +  # start y-axis at 0
  labs(
    title = "Tuberculosis Cases in Mongolia",
    subtitle = "Monthly reported cases",
    x = NULL,
    y = "Number of Cases (Monthly)",
    caption = "Source: NSO Mongolia (DT_NSO_2100_035V1)"
  ) +
  theme_minimal(base_size = 12) +
  theme(
    plot.title = element_text(face = "bold", size = 16),
    plot.subtitle = element_text(color = "grey40"),
    panel.grid.minor = element_blank()
  )

p  # print static ggplot

Biostatistical Note: This plot shows the number of reported cases, not the incidence rate. Trends should be interpreted with caution, as an increase in cases could be due to population growth or improved detection, rather than an increase in disease risk.

Tips for Epidemiological Research

Always check time coverage: Use nso_table_periods() to verify data availability
Use labels for clarity: Set labels = "en" to get readable dimension names
Join multiple indicators: Combine tables to calculate derived metrics (e.g., case-fatality rates)
Account for denominator data: Link disease counts with population data for rate calculations
Regional analysis: Most health tables include breakdowns by aimag and soum for geographic analysis

Next Steps

Mapping Health Outcomes: See the Mapping Guide for spatial epidemiology
Reference Documentation: Explore all available functions in the Reference

These binaries (installable software) and packages are in development.
They may not be fully stable and should be used with caution. We make no claims about them.