The hardware and bandwidth for this mirror is donated by METANET, the Webhosting and Full Service-Cloud Provider.
If you wish to report a bug, or if you are interested in having us mirror your free-software or open-source project, please feel free to contact us at mirror[@]metanet.ch.

Discovering Public Health Data

library(mongolstats)
library(dplyr)
library(ggplot2)
nso_options(mongolstats.lang = "en")

# Global theme with proper margins to prevent text cutoff
theme_set(
  theme_minimal(base_size = 11) +
    theme(
      plot.margin = margin(10, 10, 10, 10),
      plot.title = element_text(size = 13, face = "bold"),
      plot.subtitle = element_text(size = 10, color = "grey40"),
      legend.text = element_text(size = 9),
      legend.title = element_text(size = 10)
    )
)

Overview

Mongolia’s National Statistics Office maintains comprehensive public health surveillance data. This guide demonstrates how to discover and access epidemiological data for research and policy analysis.

Finding Health Tables

Search by Keyword

Finding the right data is the first step in any analysis. The nso_itms_search() function allows you to query the entire NSO catalog using simple keywords:

# Infant and maternal health
mortality <- nso_itms_search("mortality")
mortality |>
  select(tbl_id, tbl_eng_nm) |>
  head(10)

# Cancer surveillance
cancer <- nso_itms_search("cancer")
cancer |> select(tbl_id, tbl_eng_nm)

# Communicable diseases
infectious <- nso_itms_search("tuberculosis")
infectious |> select(tbl_id, tbl_eng_nm)

Browse by Sector

Health and education statistics are grouped together:

# View all sectors
sectors <- nso_sectors()
sectors

# Find health-related subsectors
health_sector <- sectors |> filter(grepl("health", text, ignore.case = TRUE))
if (nrow(health_sector) > 0) {
  subsectors <- nso_subsectors(health_sector$id[1])
  subsectors |> head()
}

Case Study: Cancer Epidemiology

Exploring Cancer Incidence Data

Cancer burden is shifting in Mongolia. To understand these changes, we can analyze incidence trends over the last decade:

# Find cancer incidence table
cancer_tbl <- "DT_NSO_2100_012V1" # New cases per 10,000 population

# Examine available dimensions
meta <- nso_table_meta(cancer_tbl)
meta

# View cancer types
cancer_types <- nso_dim_values(cancer_tbl, "Type malignant neoplasms", labels = "en")
cancer_types |> head(10)

# Check time coverage
# Note: "Annual" dimension uses internal codes, so we map labels (years) to codes
annual_meta <- nso_dim_values(cancer_tbl, "Annual", labels = "both")
years <- annual_meta$label_en
years

Regional Disparities

# Infant mortality by aimag
imr_tbl <- "DT_NSO_2100_015V1" # IMR per 1,000 live births (Monthly)

# Get metadata
imr_meta <- nso_table_meta(imr_tbl)
months <- nso_dim_values(imr_tbl, "Month", labels = "en")

# Fetch recent data for all regions (2024 Average)
months_2024 <- months |>
  filter(grepl("2024", label_en)) |>
  pull(code)

imr_data <- nso_data(
  tbl_id = imr_tbl,
  selections = list(
    "Region" = nso_dim_values(imr_tbl, "Region")$code,
    "Month" = months_2024
  ),
  labels = "en"
) |>
  filter(nchar(Region) == 3) |> # Keep only Aimags and Ulaanbaatar
  mutate(
    Region_en = trimws(Region_en),
    Region_en = dplyr::case_match(
      Region_en,
      "Bayan-Ulgii" ~ "Bayan-Ölgii",
      "Uvurkhangai" ~ "Övörkhangai",
      "Khuvsgul" ~ "Hovsgel",
      "Umnugovi" ~ "Ömnögovi",
      "Tuv" ~ "Töv",
      "Sukhbaatar" ~ "Sükhbaatar",
      .default = Region_en
    )
  ) |>
  group_by(Region_en) |>
  summarise(value = mean(value, na.rm = TRUE), .groups = "drop")

# Find regions with highest IMR
imr_data |>
  arrange(desc(value)) |>
  select(Region_en, value) |>
  head(10)

Time Trend Analysis

# Analyze national trend (Monthly)
imr_national <- nso_data(
  tbl_id = imr_tbl,
  selections = list(
    "Region" = "0", # National total
    "Month" = months$code
  ),
  labels = "en"
)

# Analyze national infant mortality trend using monthly data
# Convert Month_en column (format: "YYYY-MM") to proper dates
# Filter to 2019-2024 period for clear recent trends

imr_national |>
  mutate(date = as.Date(paste0(Month_en, "-01"))) |>
  filter(date >= as.Date("2019-01-01") & date <= as.Date("2024-12-31")) |>
  ggplot(aes(x = date, y = value, group = 1)) +
  geom_line(color = "#2980b9", linewidth = 1, alpha = 0.3) +  # dim raw data so trend stands out
  geom_point(color = "#2980b9", size = 2, shape = 21, fill = "white", stroke = 1, alpha = 0.5) +
  geom_smooth(method = "loess", se = TRUE, color = "#2980b9", fill = "#2980b9", alpha = 0.2, linewidth = 1.5) +  # LOESS smoother reveals underlying trend
  scale_x_date(date_breaks = "1 year", date_labels = "%Y") +
  scale_y_continuous(limits = c(0, NA), expand = expansion(mult = c(0, 0.2))) +  # y-axis starts at 0 to avoid exaggerating changes
  labs(
    title = "Infant Mortality Rate Trend",
    subtitle = "Monthly Deaths per 1,000 live births (2019-2024)",
    x = NULL,
    y = "IMR (per 1,000 live births)",
    caption = "Source: NSO Mongolia"
  ) +
  theme_minimal(base_size = 12) +
  theme(
    plot.title = element_text(face = "bold", size = 16),
    plot.subtitle = element_text(color = "grey40"),
    panel.grid.minor = element_blank(),
    panel.grid.major.x = element_blank()
  )

Case Study: Tuberculosis Burden

Let’s analyze the seasonal trends of Tuberculosis using monthly data.

# TB cases (Monthly)
tb_tbl <- "DT_NSO_2100_035V1" # CASES OF COMMUNICABLE DISEASES, by type of selected diseases and by month

# Get metadata to find the code for Tuberculosis
# Note: Dimensions are "Indicators" and "Month"
indicators <- nso_dim_values(tb_tbl, "Indicators", labels = "en")
tb_code <- indicators |>
  filter(grepl("Tuberculosis", label_en, ignore.case = TRUE)) |>
  pull(code)

# Fetch monthly data
tb_data <- nso_data(
  tbl_id = tb_tbl,
  selections = list(
    "Indicators" = tb_code,
    "Month" = nso_dim_values(tb_tbl, "Month")$code
  ),
  labels = "en"
)

# Visualize Monthly Tuberculosis Trends
p <- tb_data |>
  mutate(date = as.Date(paste0(Month_en, "-01"))) |>
  filter(!is.na(value)) |>
  ggplot(aes(x = date, y = value, group = 1)) +
  geom_line(color = "#2c3e50", linewidth = 1, alpha = 0.3) +  # dim raw data
  geom_point(color = "#2c3e50", size = 2, alpha = 0.3) +
  geom_smooth(method = "loess", se = TRUE, color = "#e74c3c", fill = "#e74c3c", alpha = 0.2, linewidth = 1.5) +  # trend line
  scale_x_date(date_breaks = "1 year", date_labels = "%Y") +
  scale_y_continuous(limits = c(0, NA), expand = expansion(mult = c(0, 0.2))) +  # start y-axis at 0
  labs(
    title = "Tuberculosis Cases in Mongolia",
    subtitle = "Monthly reported cases",
    x = NULL,
    y = "Number of Cases (Monthly)",
    caption = "Source: NSO Mongolia (DT_NSO_2100_035V1)"
  ) +
  theme_minimal(base_size = 12) +
  theme(
    plot.title = element_text(face = "bold", size = 16),
    plot.subtitle = element_text(color = "grey40"),
    panel.grid.minor = element_blank()
  )

p  # print static ggplot

Biostatistical Note: This plot shows the number of reported cases, not the incidence rate. Trends should be interpreted with caution, as an increase in cases could be due to population growth or improved detection, rather than an increase in disease risk.

Tips for Epidemiological Research

  1. Always check time coverage: Use nso_table_periods() to verify data availability
  2. Use labels for clarity: Set labels = "en" to get readable dimension names
  3. Join multiple indicators: Combine tables to calculate derived metrics (e.g., case-fatality rates)
  4. Account for denominator data: Link disease counts with population data for rate calculations
  5. Regional analysis: Most health tables include breakdowns by aimag and soum for geographic analysis

Next Steps

These binaries (installable software) and packages are in development.
They may not be fully stable and should be used with caution. We make no claims about them.