\newcommand{\NWtarget}[2]{\hypertarget{#1}{#2}}
\newcommand{\NWlink}[2]{\hyperlink{#1}{#2}}
\newcommand{\NWtxtMacroDefBy}{Fragment defined by}
\newcommand{\NWtxtMacroRefIn}{Fragment referenced in}
\newcommand{\NWtxtMacroNoRef}{Fragment never referenced}
\newcommand{\NWtxtDefBy}{Defined by}
\newcommand{\NWtxtRefIn}{Referenced in}
\newcommand{\NWtxtNoRef}{Not referenced}
\newcommand{\NWtxtFileDefBy}{File defined by}
\newcommand{\NWtxtIdentsUsed}{Uses:}
\newcommand{\NWtxtIdentsNotUsed}{Never used}
\newcommand{\NWtxtIdentsDefed}{Defines:}
\newcommand{\NWsep}{${\diamond}$}
\newcommand{\NWnotglobal}{(not defined globally)}
\newcommand{\NWuseHyperlinks}{}
\documentclass[a4paper]{report}

%\VignetteIndexEntry{Stratified K-sample Inference}
%\VignetteDepends{multcomp,survival,Hmisc,coin,rms,latticeExtra,daewr}
%\VignetteKeywords{semiparametric model,conditional inference}}
%\VignettePackage{free1way.docreg}

%% packages
\usepackage{amsfonts,amstext,amsmath,amssymb,amsthm}

\usepackage[utf8]{inputenc}

\newif\ifshowcode
\showcodetrue

\usepackage{latexsym}
%\usepackage{html}

\usepackage{listings}

\usepackage{color}
\definecolor{linkcolor}{rgb}{0, 0, 0.7}


\usepackage[round]{natbib}


\usepackage[%
backref,%
pageanchor=true,%
raiselinks,%
pdfhighlight=/O,%
pagebackref,%
hyperfigures,%
breaklinks,%
colorlinks,%
pdfpagemode=UseNone,%
pdfstartview=FitBH,%
linkcolor={linkcolor},%
anchorcolor={linkcolor},%
citecolor={linkcolor},%
filecolor={linkcolor},%
menucolor={linkcolor},%
urlcolor={linkcolor}%
]{hyperref}

%%% ATTENTION: no bib keys with _ allowed!
\usepackage{underscore}

\usepackage[top=25mm,bottom=25mm,left=25mm,right=25mm]{geometry}

\usepackage{lmodern}

\newcommand{\pkg}[1]{\textbf{#1}}
\newcommand{\proglang}[1]{\textsf{#1}}
\newcommand{\code}[1]{\texttt{#1}}
\newcommand{\file}[1]{\texttt{#1}}

\newcommand{\R}{\mathbb{R} }
\newcommand{\samY}{\mathcal{Y} }
\newcommand{\Prob}{\mathbb{P} }
\newcommand{\N}{\mathbb{N} }
%\newcommand{\C}{\mathbb{C} }
\newcommand{\V}{\mathbb{V}} %% cal{\mbox{\textnormal{Var}}} }
\newcommand{\E}{\mathbb{E}} %%mathcal{\mbox{\textnormal{E}}} }
\newcommand{\Var}{\mathbb{V}} %%mathcal{\mbox{\textnormal{Var}}} }
\newcommand{\argmin}{\operatorname{argmin}\displaylimits}
\newcommand{\argmax}{\operatorname{argmax}\displaylimits}
\newcommand{\LS}{\mathcal{L}_n}
\newcommand{\TS}{\mathcal{T}_n}
\newcommand{\LSc}{\mathcal{L}_{\text{comb},n}}
\newcommand{\LSbc}{\mathcal{L}^*_{\text{comb},n}}
\newcommand{\F}{\mathcal{F}}
\newcommand{\A}{\mathcal{A}}
\newcommand{\yn}{y_{\text{new}}}
\newcommand{\z}{\mathbf{z}}
\newcommand{\X}{\mathbf{X}}
\newcommand{\Z}{\mathbf{Z}}
\newcommand{\Y}{\mathbf{Y}}
\newcommand{\mH}{\mathbf{H}}
\newcommand{\mA}{\mathbf{A}}
\newcommand{\mL}{\mathbf{L}}
\newcommand{\mU}{\mathbf{U}}
\newcommand{\sX}{\mathcal{X}}
\newcommand{\sY}{\mathcal{Y}}
\newcommand{\T}{\mathbf{T}}
\newcommand{\x}{\mathbf{x}}
\renewcommand{\a}{\mathbf{a}}
\newcommand{\xn}{\mathbf{x}_{\text{new}}}
\newcommand{\y}{\mathbf{y}}
\newcommand{\uvec}{\mathbf{u}}
\newcommand{\vvec}{\mathbf{v}}
\newcommand{\w}{\mathbf{w}}
\newcommand{\sbullet}{\mathbin{\vcenter{\hbox{\scalebox{0.5}{$\bullet$}}}}}
\newcommand{\wdot}{\mathbf{w}_{\sbullet}}
\renewcommand{\t}{\mathbf{t}}
\newcommand{\M}{\mathbf{M}}
\renewcommand{\vec}{\text{vec}}
\newcommand{\B}{\mathbf{B}}
\newcommand{\K}{\mathbf{K}}
\newcommand{\W}{\mathbf{W}}
\newcommand{\D}{\mathbf{D}}
\newcommand{\I}{\mathbf{I}}
\newcommand{\bS}{\mathbf{S}}
\newcommand{\cellx}{\pi_n[\x]}
\newcommand{\partn}{\pi_n(\mathcal{L}_n)}
\newcommand{\err}{\text{Err}}
\newcommand{\ea}{\widehat{\text{Err}}^{(a)}}
\newcommand{\ecv}{\widehat{\text{Err}}^{(cv1)}}
\newcommand{\ecvten}{\widehat{\text{Err}}^{(cv10)}}
\newcommand{\eone}{\widehat{\text{Err}}^{(1)}}
\newcommand{\eplus}{\widehat{\text{Err}}^{(.632+)}}
\newcommand{\eoob}{\widehat{\text{Err}}^{(oob)}}
\newcommand{\mub}{\boldsymbol{\mu}}
\newcommand{\Sigmab}{\boldsymbol{\Sigma}}
\def \thetavec        {\text{\boldmath$\theta$}}
\newcommand{\rT}{G}
\newcommand{\rS}{S}
\newcommand{\rt}{g}


<<citation, echo = FALSE>>=
yr <- format(dt <- as.Date(packageDescription("free1way.docreg")$Date), "%Y")
vs <- packageDescription("free1way.docreg")$Version
title <- "Semiparametrically Efficient Population and Permutation Inference in 
       Distribution-free Stratified $K$-sample Oneway Layouts"
DOI <- paste0("10.32614/CRAN.package.", packageDescription("free1way.docreg")$Package)
@


\author{Torsten Hothorn \\ Universit\"at Z\"urich \and
        Kurt Hornik \\ WU Wirtschaftsuniversit\"at Wien}

\title{\Sexpr{title}\footnote{Please cite this document as: Torsten Hothorn and Kurt Hornik
(\Sexpr{yr}), \Sexpr{title}, \proglang{R} package vignette version \Sexpr{vs},
\href{https://doi.org/\Sexpr{DOI}}{DOI:\Sexpr{DOI}}}
}
\date{\Sexpr{format(dt)}}

\begin{document}

\pagenumbering{roman}
\maketitle

\tableofcontents

\begin{abstract}
Starting with \proglang{R} 4.6-0, the \pkg{stats} package provides
infrastructure for distribution-free model-based inference in possibly stratified $K$-sample
oneway layouts via the novel \code{free1way} model function. Treatment
effects to be estimated using \code{free1way} include odds- and hazard
ratios, Lehmann parameters, and a generalised version of Cohen's d for at
least ordered and possibly right-censored outcomes.

In addition to nonparametric maximum-likelhood estimators of treatment effects,
the procedure allows Wald, Rao score, and likelihood ratio tests with
corresponding confidence intervals to be computed. Asymptotic and approximate
Monte-Carlo-based permutation tests and confidence intervals are also
available. In proportional odds models, exact permutation inference is
implemented based on exact permutation distributions derived via the
Streitberg-R\"ohmel algorithm.

Graphical tools for model diagnostics, including model-based confidence
bands for receiver operating characteristic (ROC) curves in
probability-probability plots in the new \code{ppplot} function, 
allow data-driven criticism of model assumptions.

Power assessment and sample-size planning is facilitated either in a
simulation-based way relying on random number generation via 
\code{rfree1way} or based on approximations of the information matrix in
\code{power.free1way.test}, the latter approach being much faster but
slightly less accurate.

The new \code{free1way} function can be understood as a unification and
generalisation of some of the classical ``nonparametric'' test procedures in
\pkg{stats}, including \code{kruskal.test}, \code{wilcox.test},
\code{friedman.test}, \code{mantelhaen.test},
\code{prop.test}, \code{mcnemar.test}, as well as \code{power.prop.test}, allowing the
magnitude of treatment effects to be interpreted on various scales,
providing the possibility to assessment variability by means of confidence intervals and corresponding
tests for these parameters, and offering tools for sample-size planning and
model criticism.

This document explains the technical underpinnings of the implementation.
The \pkg{free1way} package provides this vignette as additional 
documentation and serves as a home for extensive regression tests.
\end{abstract}

\chapter*{Introduction}

Comparing two or more independent samples with respect to some outcome
measure is a common task. Many procedures are available in 
\pkg{stats} and other add-on packages, most of these implementations
making rather strict assumptions regarding the outcome distribution, the
number of samples, the presence of blocks or strata and typically offer
either conditional or unconditional (exact or asymptotic) inference.

This document presents a unified, dense, and yet holistic implementation
covering many classical procedures as special cases. Leveraging 
transformation models, likelihood-based parameter estimation as well as
permutation- and likelihood-based inference are formulated and implemented. One
can understand this contribution as a unification of many of
\code{stats::*.test} procedures, the models available in 
\code{MASS::polr}, \code{rms::orm}, \code{rms::lrm}, \code{survival::coxph}, 
or the \pkg{tram} add-on package (among many others), 
and permutation-based inference in \pkg{coin}.

This implementation is, however, free of any strong dependencies and only
uses functionality available in \proglang{R} itself and the \pkg{stats},
\pkg{graphics}, and \pkg{Matrix} recommended packages.

\chapter{Model and Parameterisation}
\label{ch:model}
\pagenumbering{arabic}

We consider $K$ treatment groups $\rT \in \{1, \dots, K\}, K \ge 2$ for an
at least ordered outcome $Y \in \samY$ observed in
stratum $\rS \in \{1, \dots, B\}$ out of $B \ge 1$ blocks with conditional
cumulative distribution function (cdf)
$F_Y(y \mid \rT = k, \rS = b) = \Prob(Y \le y \mid \rT = k, \rS = b)$. Detecting
and describing differential distributions arising from different treatments
is our main objective. We refer to the first treatment $\rT = 1$ as
``control''.

\paragraph{Model}

With model function $m: [0,1] \times \R \rightarrow [0,1]$, we describe
the conditional distribution under treatment $k$ as a function of the 
conditional distribution under control and a scalar parameter
$\delta_k$:
\begin{eqnarray*}
F(y \mid \rT = k, \rS = b) = m(F(y \mid \rT = 1, \rS = b), \delta_k).
\end{eqnarray*}
The model is assumed to hold for all blocks $b = 1,
\dots, B$, treatments $k = 2, \dots, K$, and outcome values $y \in \samY$ based on parameters
$\delta_2, \dots, \delta_K \in \R$. For notational convenience, we define $\delta_1 := 0$. 

This model formulation gives rise to several specific models, for
example, $m_\text{L}(p, \delta) = p^{\exp(-\delta)}$ (Lehmann alternatives),
$m_\text{PH}(p, \delta) = 1 - (1 -
p)^{\exp(-\delta)}$ (proportional hazards),
$m_\text{PO}(p, \delta) = \text{expit}(\text{logit}(p) - \delta)$ (proportional
odds), or $m_\text{Cd}(p, \delta) =
\Phi(\Phi^{-1}(p) - \delta)$ (generalised Cohen's $d$).

Instead of directly working with $g$, we parameterise the model in terms of
some absolute continuous cdf $F$ with log-concave density $f = F^\prime$
and corresponding derivative $f^\prime$. The location model 
\begin{eqnarray} \label{model}
F_Y(y \mid  \rT = k, \rS = b) = F\left(F^{-1}\left(F_Y(y \mid \rT = 1, \rS = b)\right) - 
                                       \delta_k\right), \quad k = 2, \dots, K
\end{eqnarray}
describes different distributions by means of shift parameter on a latent
scale defined by $F$. The negative shift term ensures that positive values of $\delta_k$ correspond
to the situation of outcomes being stochastically larger in group $k$
compared to control.
The shift parameters are invariant with respect to monotone transformations
of the response values, that is, transforming the observations of all
treatment groups by the same function does not affect the values of
$\delta_k$.

The choice $F(z) = \exp(-\exp(-z))$ gives rise to $m_\text{L}$, 
$F(z) = 1 - \exp(-\exp(z))$ corresponds to $m_\text{PH}$, $F = \text{expit}$
leads to  $m_\text{PO}$, and $F = \Phi$ results in $m_\text{Cd}$. The choice
of $F$ is made a priori and determines the interpretation of $\delta_k$. 

This document describes the implementation of estimators of these shift parameters,
as well as of confidence intervals and formal hypothesis tests for contrasts thereof under
the permutation and population model. Proportional odds models ($m_\text{PO}$)
are explained in-depths by \cite{Harrell2015RMS}, although the models are
presented in terms of survivor, not distribution, functions. 

\paragraph{Hypothesis}

We are interested in inference for $\delta_2, \dots, \delta_K$, in terms of
confidence intervals and hypothesis tests of the form
\begin{eqnarray*}
& & H_0: \delta_k - \mu_k = 0, \text{``two.sided''}, \quad k = 2, \dots, K, \\
& & H_0: \delta_k - \mu_k \ge 0, \text{``less''}, \quad k = K = 2, \\
& & H_0: \delta_k - \mu_k \le 0, \text{``greater''}, \quad k = K = 2,
\end{eqnarray*}
with the latter two options only for the two-sample case ($K = 2$).

\paragraph{Likelihood}

For an ordered categorical outcome $Y$ from sample space $\samY = \{\upsilon_1 <
\upsilon_2 < \cdots < \upsilon_C\}$, we parameterise the model in terms of intercept ($\vartheta_\cdot$) and
shift ($\delta_\cdot$) parameters
\begin{eqnarray*}
F_Y(\upsilon_c \mid \rT = k, \rS = b) = F(\vartheta_{c,b} - \delta_k), \quad c = 1, \dots, C,
\end{eqnarray*}
that is we replace the transformed control outcome $F^{-1}\left(F_Y(\upsilon_c \mid
\rT = 1, \rS = b)\right) =
\vartheta_{c,b}$ with a corresponding intercept parameter.
These $C - 1$ intercept parameters are block-specific and monotone increasing
$\vartheta_{0,b} = -\infty < \vartheta_{1,b} < \cdots < \vartheta_{C,b} = \infty$
within each block $b = 1, \dots, B$.

We collect all model parameters in a vector
\begin{eqnarray*}
\thetavec = (\theta_1 & := & \delta_2, \\
               & \dots & , \\
               \theta_{K - 1} & := & \delta_K, \\
               \theta_{K} & := & \vartheta_{1,1}, \\
               \theta_{K + 1} & := & \vartheta_{2,1} > \vartheta_{1,1}, \\
               &  \dots, & \\
               \theta_{K + C - 2} & := & \vartheta_{C-1,1} > \vartheta_{C-2,1}, \\
               \theta_{K + C - 1} & := & \vartheta_{1,2}, \\
               & \dots &, \\
               \theta_{B (C - 1) + K - 1} & := & \vartheta_{C-1,B} >
               \vartheta_{C-2,B}).
\end{eqnarray*}
If there is no observation for level $c$ in block $b$, the corresponding
parameter is not identified and removed from $\thetavec$.
The parameter space is defined by all parameter vectors $\thetavec$
satisfying the monotonicity of the intercept parameters. Violations always
lead to the log-likelihood function being undefined and thus taking the
value $-\infty$. \cite{Harrell2024} evaluates unconstrained optimisation in
this context and recommends Newton-based algorithms leveraging the
analytically available Hessian (see below).

For the $i$th observation $(y_i = \upsilon_c, \rt_i = k, s_i = b)$ from block $b$
under treatment $k$, the log-likelihood contribution is
\begin{eqnarray*}
\log(\Prob(\upsilon_{c - 1} < Y \le \upsilon_c \mid \rT = k, \rS = b)) = \log(F(\vartheta_{c,b} - \delta_k) - F(\vartheta_{c - 1,b} - \delta_k))
\end{eqnarray*}
with $\upsilon_0 = -\infty$.

For an absolutely continuous outcome $Y \in \R$, we define $\upsilon_c := y_{(c)}$,
the $c$th distinct ordered observation in the sample. The log-likelihood
above is then the empirical or nonparametric log-likelihood.

If observations were independently right-censored, the contribution of the
event $Y > \tilde{y}$ to the log-likelihood is
\begin{eqnarray*}
\log(\Prob(Y > \tilde{y} \mid \rT = k, \rS = b)) = \log(1 - F(\vartheta_{c - 1,b} - \delta_k))
\end{eqnarray*}
where $\upsilon_{c - 1} = \max \{\upsilon \in \samY \mid \upsilon \le \tilde{y}\}$, that is,
observations right-censored between $\upsilon_{c - 1}$ and $\upsilon_c$ correspond to the
parameter $\vartheta_{c - 1,b}$.

Maximising this form of the log-likelihood leads to semiparametrically efficient
estimators \citep[Chapter 15.5][]{vdVaart1998}. In this framework, tests
against deviations from the hypothesis $H_0$ above are locally most
powerful rank tests, for example against proportional odds ($F =
\text{expit})$ or proportional hazards alternatives 
\citep[$F(z) = 1 - \exp(-\exp(z))$,][Example 15.16]{vdVaart1998}.

We represent the data in form of a $C \times K \times B$ contingency table,
whose element $(c, k, b)$ is the number of observations with configuration $(y = y_c, \rt =
k, s = b)$. In the presence of right-censoring, a fourth dimension is added 
($C \times K \times B \times 2)$ whose first $C \times K \times B$ table presents
right-censoring and the second table contains numbers of events.


\chapter{Parameter Estimation}
\label{ch:est}

%%% copy nuweb R code into Sweave chunk
<<localfun, echo = FALSE>>=
Nsim <- 100
options(digits = 5)
.table2list <- function(x) 
{

    
dx <- dim(x)
if (length(dx) == 1L)
    stop("incorrect dimensions")
if (length(dx) == 2L)
    x <- as.table(array(x, dim = c(dx, 1)))
dx <- dim(x)
if (length(dx) < 3L)
    stop("incorrect dimensions")
C <- dim(x)[1L]
K <- dim(x)[2L]
B <- dim(x)[3L]
if (C < 2L)
    stop("at least two response categories required")
if (K < 2L)
    stop("at least two groups required")
xrc <- NULL
if (length(dx) == 4L) {
    if (dx[4] == 2L) {
        xrc <- array(x[,,,"FALSE", drop = TRUE], dim = dx[1:3])
        x <- array(x[,,,"TRUE", drop = TRUE], dim = dx[1:3])
    } else {
        stop(gettextf("%s currently only allows independent right-censoring",
                      "free1way"),
             domain = NA)
    }
}


xlist <- xrclist <- vector(mode = "list", length = B)

for (b in seq_len(B)) {
    xb <- matrix(x[,,b, drop = TRUE], ncol = K)
    xw <- rowSums(abs(xb)) > 0
    if (sum(xw) > 1L) {
        ### do not remove last parameter if there are corresponding
        ### right-censored observations
        wm <- which(xw)[sum(xw)]
        if (!is.null(xrc) && any(xrc[wm:dx[1],,b,drop = TRUE] > 0))
            xw[length(xw)] <- TRUE
        xlist[[b]] <- xb[xw,,drop = FALSE]
        Cidx <- rep.int(1L, times = C)
        Cidx[xw] <- Cidx[xw] + seq_len(sum(xw))
        attr(xlist[[b]], "idx") <- Cidx
        if (!is.null(xrc)) {
            ### count right-censored observations between distinct event
            ### times
            cs <- apply(xrc[,,b,drop = TRUE] * (!xw), 2, function(x) 
                diff(c(0, cumsum(x)[xw])))
            xrclist[[b]] <- matrix(xrc[xw,,b,drop = TRUE], ncol = K) + cs
            idx <- seq_len(C)[xw]
            idx <- rep(seq_len(sum(xw)), times = c(idx[1], diff(idx)))
            Cidx <- rep.int(1L, times = C)
            Cidx[seq_along(idx)] <- Cidx[seq_along(idx)] + idx
            attr(xrclist[[b]], "idx") <- Cidx
        }
    }
}
### remove empty blocks
strata <- !vapply(xlist, is.null, NA)
xlist <- xlist[strata]
xrclist <- xrclist[strata]


    ret <- list(xlist = xlist)
    if (!is.null(xrc))
        ret$xrclist <- xrclist
    ret$strata <- strata
    ret
}

.nll <- function(parm, x, mu = 0, rightcensored = FALSE) 
{

    
    bidx <- seq_len(ncol(x) - 1L)
    delta <- c(0, mu + parm[bidx])
    intercepts <- c(-Inf, parm[- bidx], Inf)
    tmb <- intercepts - matrix(delta, nrow = length(intercepts),  
                                      ncol = ncol(x),
                                      byrow = TRUE)
    Ftmb <- F(tmb)
    if (rightcensored) {
        prb <- 1 - Ftmb[- nrow(Ftmb), , drop = FALSE]
    } else {
        prb <- Ftmb[- 1L, , drop = FALSE] - 
               Ftmb[- nrow(Ftmb), , drop = FALSE]
    } 
    
    if (any(prb < .Machine$double.eps^10)) 
        return(Inf)
    return(- sum(x * log(prb)))
}


.nsc <- function(parm, x, mu = 0, rightcensored = FALSE) 
{

    
    bidx <- seq_len(ncol(x) - 1L)
    delta <- c(0, mu + parm[bidx])
    intercepts <- c(-Inf, parm[- bidx], Inf)
    tmb <- intercepts - matrix(delta, nrow = length(intercepts),  
                                      ncol = ncol(x),
                                      byrow = TRUE)
    Ftmb <- F(tmb)
    if (rightcensored) {
        prb <- 1 - Ftmb[- nrow(Ftmb), , drop = FALSE]
    } else {
        prb <- Ftmb[- 1L, , drop = FALSE] - 
               Ftmb[- nrow(Ftmb), , drop = FALSE]
    } 
    

    ftmb <- f(tmb)
    zu <- x * ftmb[- 1, , drop = FALSE] / prb
    if (rightcensored) zu[] <- 0 ### derivative of a constant
    zl <- x * ftmb[- nrow(ftmb), , drop = FALSE] / prb
    

    ret <- numeric(length(parm))
    ret[bidx] <- .colSums(zl, m = nrow(zl), n = ncol(zl))[-1L] -
                 .colSums(zu[-nrow(zu),,drop = FALSE], 
                          m = nrow(zu) - 1L, n = ncol(zu))[-1L]
    ret[- bidx] <- .rowSums(zu[-nrow(zu),,drop = FALSE] - 
                            zl[-1,,drop = FALSE], 
                            m = nrow(zu) - 1L, n = ncol(zu))
    return(- ret)
}


.nsr <- function(parm, x, mu = 0, rightcensored = FALSE) 
{

    
    bidx <- seq_len(ncol(x) - 1L)
    delta <- c(0, mu + parm[bidx])
    intercepts <- c(-Inf, parm[- bidx], Inf)
    tmb <- intercepts - matrix(delta, nrow = length(intercepts),  
                                      ncol = ncol(x),
                                      byrow = TRUE)
    Ftmb <- F(tmb)
    if (rightcensored) {
        prb <- 1 - Ftmb[- nrow(Ftmb), , drop = FALSE]
    } else {
        prb <- Ftmb[- 1L, , drop = FALSE] - 
               Ftmb[- nrow(Ftmb), , drop = FALSE]
    } 
    

    ftmb <- f(tmb)
    zu <- x * ftmb[- 1, , drop = FALSE] / prb
    if (rightcensored) zu[] <- 0 ### derivative of a constant
    zl <- x * ftmb[- nrow(ftmb), , drop = FALSE] / prb
    

    ret <- .rowSums(zl - zu, m = nrow(zl), n = ncol(zl)) / 
           .rowSums(x, m = nrow(x), n = ncol(x))
    ret[!is.finite(ret)] <- 0
    return(- ret)
}


.hes <- function(parm, x, mu = 0, rightcensored = FALSE, full = FALSE) 
{

    
    bidx <- seq_len(ncol(x) - 1L)
    delta <- c(0, mu + parm[bidx])
    intercepts <- c(-Inf, parm[- bidx], Inf)
    tmb <- intercepts - matrix(delta, nrow = length(intercepts),  
                                      ncol = ncol(x),
                                      byrow = TRUE)
    Ftmb <- F(tmb)
    if (rightcensored) {
        prb <- 1 - Ftmb[- nrow(Ftmb), , drop = FALSE]
    } else {
        prb <- Ftmb[- 1L, , drop = FALSE] - 
               Ftmb[- nrow(Ftmb), , drop = FALSE]
    } 
    

    ftmb <- f(tmb)
    fptmb <- fp(tmb)

    dl <- ftmb[- nrow(ftmb), , drop = FALSE]
    du <- ftmb[- 1, , drop = FALSE]
    if (rightcensored) du[] <- 0
    dpl <- fptmb[- nrow(ftmb), , drop = FALSE]
    dpu <- fptmb[- 1, , drop = FALSE]
    if (rightcensored) dpu[] <- 0
    dlm1 <- dl[,-1L, drop = FALSE]
    dum1 <- du[,-1L, drop = FALSE]
    dplm1 <- dpl[,-1L, drop = FALSE]
    dpum1 <- dpu[,-1L, drop = FALSE]
    prbm1 <- prb[,-1L, drop = FALSE]

    i1 <- length(intercepts) - 1L
    i2 <- 1L
    

    Aoffdiag <- - .rowSums(x * du * dl / prb^2, m = nrow(x), n = ncol(x))[-i2]
    Aoffdiag <- Aoffdiag[-length(Aoffdiag)]
    
    
    Adiag <- - .rowSums((x * dpu / prb)[-i1,,drop = FALSE] - 
                        (x * dpl / prb)[-i2,,drop = FALSE] - 
                        ((x * du^2 / prb^2)[-i1,,drop = FALSE] + 
                         (x * dl^2 / prb^2)[-i2,,drop = FALSE] ), 
                        m = nrow(x) - length(i1), n = ncol(x)
                       )
                      
    
    xm1 <- x[,-1L,drop = FALSE] 
    X <- ((xm1 * dpum1 / prbm1)[-i1,,drop = FALSE] - 
          (xm1 * dplm1 / prbm1)[-i2,,drop = FALSE] - 
          ((xm1 * dum1^2 / prbm1^2)[-i1,,drop = FALSE] - 
           (xm1 * dum1 * dlm1 / prbm1^2)[-i2,,drop = FALSE] -
           (xm1 * dum1 * dlm1 / prbm1^2)[-i1,,drop = FALSE] +
           (xm1 * dlm1^2 / prbm1^2)[-i2,,drop = FALSE]
          )
         )

    Z <- - .colSums(xm1 * (dpum1 / prbm1 - 
                           dplm1 / prbm1 -
                           (dum1^2 / prbm1^2 - 
                            2 * dum1 * dlm1 / prbm1^2 +
                            dlm1^2 / prbm1^2
                           )
                          ),
                    m = nrow(xm1), n = ncol(xm1)
                    )
    if (length(Z) > 1L) Z <- diag(Z)
    

    if (length(Adiag) > 1L) {
        if (!isFALSE(full)) {
            A <- list(Adiag = Adiag, Aoffdiag = Aoffdiag)
        } else {
            A <- Matrix::bandSparse(length(Adiag), 
                k = 0:1, diagonals = list(Adiag, Aoffdiag), 
                symmetric = TRUE)
        }
    } else {
        if (!isFALSE(full)) {
            A <- list(Adiag = Adiag, Aoffdiag = NULL)
        } else {
            A <- matrix(Adiag)
        }
    }
    return(list(A = A, X = X, Z = Z))
}


.snll <- function(parm, x, mu = 0, rightcensored = FALSE) 
{

    
    C <- vapply(x, NROW, 0L) ### might differ by stratum
    K <- unique(do.call("c", lapply(x, ncol))) ### the same
    B <- length(x)
    sidx <- factor(rep(seq_len(B), times = pmax(0, C - 1L)), 
                   levels = seq_len(B))
    bidx <- seq_len(K - 1L)
    delta <- parm[bidx]
    intercepts <- split(parm[-bidx], sidx)
    

    ret <- 0
    for (b in seq_len(B))
        ret <- ret + .nll(c(delta, intercepts[[b]]), x[[b]], mu = mu,
                          rightcensored = rightcensored)
    return(ret)
}


.snsc <- function(parm, x, mu = 0, rightcensored = FALSE) 
{

    
    C <- vapply(x, NROW, 0L) ### might differ by stratum
    K <- unique(do.call("c", lapply(x, ncol))) ### the same
    B <- length(x)
    sidx <- factor(rep(seq_len(B), times = pmax(0, C - 1L)), 
                   levels = seq_len(B))
    bidx <- seq_len(K - 1L)
    delta <- parm[bidx]
    intercepts <- split(parm[-bidx], sidx)
    

    ret <- numeric(length(bidx))
    for (b in seq_len(B)) {
        nsc <- .nsc(c(delta, intercepts[[b]]), x[[b]], mu = mu,
                    rightcensored = rightcensored)
        ret[bidx] <- ret[bidx] + nsc[bidx]
        ret <- c(ret, nsc[-bidx])
    }
    return(ret)
}


.shes <- function(parm, x, mu = 0, xrc = NULL, full = FALSE, 
                  retMatrix = FALSE) 
{

    
    C <- vapply(x, NROW, 0L) ### might differ by stratum
    K <- unique(do.call("c", lapply(x, ncol))) ### the same
    B <- length(x)
    sidx <- factor(rep(seq_len(B), times = pmax(0, C - 1L)), 
                   levels = seq_len(B))
    bidx <- seq_len(K - 1L)
    delta <- parm[bidx]
    intercepts <- split(parm[-bidx], sidx)
    

    if (!isFALSE(ret <- full)) {
        
        for (b in seq_len(B)) {
            H <- .hes(c(delta, intercepts[[b]]), x[[b]], mu = mu, full = full)
            if (!is.null(xrc)) {
                Hrc <- .hes(c(delta, intercepts[[b]]), xrc[[b]], mu = mu, 
                            rightcensored = TRUE, full = full)
                H$X <- H$X + Hrc$X
                H$A$Adiag <- H$A$Adiag + Hrc$A$Adiag
                H$A$Aoffdiag <- H$A$Aoffdiag + Hrc$A$Aoffdiag
                H$Z <- H$Z + Hrc$Z
            }
            if (b == 1L) {
                Adiag <- H$A$Adiag
                Aoffdiag <- H$A$Aoffdiag
                X <- H$X
                Z <- H$Z
            } else {
                Adiag <- c(Adiag, H$A$Adiag)
                Aoffdiag <- c(Aoffdiag, 0, H$A$Aoffdiag)
                X <- rbind(X, H$X)
                Z <- Z + H$Z
            }
        }

        if (length(Adiag) > 1L) {
            A <- Matrix::bandSparse(length(Adiag),
                                    k = 0:1, diagonals = list(Adiag, Aoffdiag),
                                    symmetric = TRUE)
        } else {
            A <- matrix(Adiag)
        }

        ret <- cbind(Z, t(X))
        ret <- rbind(ret, cbind(X, A))
        if (retMatrix) return(ret)
        return(as.matrix(ret))
        
    }
    ret <- matrix(0, nrow = length(bidx), ncol = length(bidx))
    for (b in seq_len(B)) {
        H <- .hes(c(delta, intercepts[[b]]), x[[b]], mu = mu)
        if (!is.null(xrc)) {
            Hrc <- .hes(c(delta, intercepts[[b]]), xrc[[b]], mu = mu, 
                        rightcensored = TRUE)
            H$X <- H$X + Hrc$X
            H$A <- H$A + Hrc$A
            H$Z <- H$Z + Hrc$Z
        }
        sAH <- tryCatch(Matrix::solve(H$A, H$X), error = function(e) NULL)
        if (is.null(sAH))
            stop(gettextf("error computing the Hessian in %s",
                          "free1way"),
                 domain = NA)
        ret <- ret + (H$Z - crossprod(H$X, sAH))
    }
    as.matrix(ret)
}


.snsr <- function(parm, x, mu = 0, rightcensored = FALSE) 
{

    
    C <- vapply(x, NROW, 0L) ### might differ by stratum
    K <- unique(do.call("c", lapply(x, ncol))) ### the same
    B <- length(x)
    sidx <- factor(rep(seq_len(B), times = pmax(0, C - 1L)), 
                   levels = seq_len(B))
    bidx <- seq_len(K - 1L)
    delta <- parm[bidx]
    intercepts <- split(parm[-bidx], sidx)
    

    ret <- c()
    for (b in seq_len(B)) {
        idx <- attr(x[[b]], "idx")
        ### idx == 1L means zero residual, see definition of idx
        sr <- c(0, .nsr(c(delta, intercepts[[b]]), x[[b]], mu = mu,
                        rightcensored = rightcensored))
        ret <- c(ret, sr[idx])
    }
    return(ret)
}


.free1wayML <- function(x, link, mu = 0, start = NULL, fix = NULL, 
                        residuals = TRUE, score = TRUE, hessian = TRUE, 
                        MPL_Jeffreys = FALSE,
                        ### use nlminb for small sample sizes
                        dooptim = c(".NewtonRaphson", "nlminb")[1 + (sum(x) < 20)],                         
                        control = list(
                            "nlminb" = list(trace = trace, iter.max = 200,
                                            eval.max = 200, rel.tol = 1e-10,
                                            abs.tol = 1e-20, xf.tol = 1e-16),
                            ".NewtonRaphson" = list(iter.max = 200, trace = trace, 
                                             objtol = 5e-4, 
                                             gradtol = 1e-5 * sum(x) / 1000, 
                                             paramtol = 1e-5, minstepsize = 1e-2, 
                                             tolsolve = .Machine$double.eps)
                        )[dooptim],
                        trace = FALSE, 
                        tol = sqrt(.Machine$double.eps), ...) 
{

    ### convert to three-way table
    xt <- x
    if (!is.table(x))
        stop(gettextf("invalid argument '%s'", "x"), domain = NA) # 'y' in free1way ...
    dx <- dim(x)
    dn <- dimnames(x)
    if (length(dx) == 2L) {
        x <- as.table(array(c(x), dim = dx <- c(dx, 1L)))
        dimnames(x) <- dn <- c(dn, list(A = "A"))
    }

    ### short-cuts for link functions
    F <- function(q) .p(link, q = q)
    Q <- function(p) .q(link, p = p)
    f <- function(q) .d(link, x = q)
    fp <- function(q) .dd(link, x = q)

    if(!suppressPackageStartupMessages(requireNamespace("Matrix")))
        stop(gettextf("%s needs package 'Matrix' correctly installed",
                      ".free1wayML"),
                 domain = NA)

    
    dx <- dim(x)
    if (length(dx) == 1L)
        stop("incorrect dimensions")
    if (length(dx) == 2L)
        x <- as.table(array(x, dim = c(dx, 1)))
    dx <- dim(x)
    if (length(dx) < 3L)
        stop("incorrect dimensions")
    C <- dim(x)[1L]
    K <- dim(x)[2L]
    B <- dim(x)[3L]
    if (C < 2L)
        stop("at least two response categories required")
    if (K < 2L)
        stop("at least two groups required")
    xrc <- NULL
    if (length(dx) == 4L) {
        if (dx[4] == 2L) {
            xrc <- array(x[,,,"FALSE", drop = TRUE], dim = dx[1:3])
            x <- array(x[,,,"TRUE", drop = TRUE], dim = dx[1:3])
        } else {
            stop(gettextf("%s currently only allows independent right-censoring",
                          "free1way"),
                 domain = NA)
        }
    }


    xlist <- xrclist <- vector(mode = "list", length = B)

    for (b in seq_len(B)) {
        xb <- matrix(x[,,b, drop = TRUE], ncol = K)
        xw <- rowSums(abs(xb)) > 0
        if (sum(xw) > 1L) {
            ### do not remove last parameter if there are corresponding
            ### right-censored observations
            wm <- which(xw)[sum(xw)]
            if (!is.null(xrc) && any(xrc[wm:dx[1],,b,drop = TRUE] > 0))
                xw[length(xw)] <- TRUE
            xlist[[b]] <- xb[xw,,drop = FALSE]
            Cidx <- rep.int(1L, times = C)
            Cidx[xw] <- Cidx[xw] + seq_len(sum(xw))
            attr(xlist[[b]], "idx") <- Cidx
            if (!is.null(xrc)) {
                ### count right-censored observations between distinct event
                ### times
                cs <- apply(xrc[,,b,drop = TRUE] * (!xw), 2, function(x) 
                    diff(c(0, cumsum(x)[xw])))
                xrclist[[b]] <- matrix(xrc[xw,,b,drop = TRUE], ncol = K) + cs
                idx <- seq_len(C)[xw]
                idx <- rep(seq_len(sum(xw)), times = c(idx[1], diff(idx)))
                Cidx <- rep.int(1L, times = C)
                Cidx[seq_along(idx)] <- Cidx[seq_along(idx)] + idx
                attr(xrclist[[b]], "idx") <- Cidx
            }
        }
    }
    ### remove empty blocks
    strata <- !vapply(xlist, is.null, NA)
    xlist <- xlist[strata]
    xrclist <- xrclist[strata]
    
    
    ## allow specification of start = delta and fix = 1:K
    ## for evaluating the likelihood at given delta parameters
    ## without having to specify all intercept parameters
    if (is.null(start))
        start <- rep.int(0, K - 1L)
    NS <- length(start) == (K - 1L)
    lwr <- rep(-Inf, times = K - 1L)
    for (b in seq_len(length(xlist))) {
        bC <- nrow(xlist[[b]]) - 1L
        lwr <- c(lwr, -Inf, rep.int(0, times = bC - 1L))
        if (NS) {
            ecdf0 <- cumsum(rowSums(xlist[[b]]))
            ### ensure that 0 < ecdf0 < 1 such that quantiles exist
            ecdf0 <- pmax(1, ecdf0[-length(ecdf0)]) / (max(ecdf0) + 1)
            Qecdf <- Q(ecdf0)
            start <- c(start, Qecdf)
        }
    }
    
    
    .nll <- function(parm, x, mu = 0, rightcensored = FALSE) 
    {

        
        bidx <- seq_len(ncol(x) - 1L)
        delta <- c(0, mu + parm[bidx])
        intercepts <- c(-Inf, parm[- bidx], Inf)
        tmb <- intercepts - matrix(delta, nrow = length(intercepts),  
                                          ncol = ncol(x),
                                          byrow = TRUE)
        Ftmb <- F(tmb)
        if (rightcensored) {
            prb <- 1 - Ftmb[- nrow(Ftmb), , drop = FALSE]
        } else {
            prb <- Ftmb[- 1L, , drop = FALSE] - 
                   Ftmb[- nrow(Ftmb), , drop = FALSE]
        } 
        
        if (any(prb < .Machine$double.eps^10)) 
            return(Inf)
        return(- sum(x * log(prb)))
    }
    
    
    .nsc <- function(parm, x, mu = 0, rightcensored = FALSE) 
    {

        
        bidx <- seq_len(ncol(x) - 1L)
        delta <- c(0, mu + parm[bidx])
        intercepts <- c(-Inf, parm[- bidx], Inf)
        tmb <- intercepts - matrix(delta, nrow = length(intercepts),  
                                          ncol = ncol(x),
                                          byrow = TRUE)
        Ftmb <- F(tmb)
        if (rightcensored) {
            prb <- 1 - Ftmb[- nrow(Ftmb), , drop = FALSE]
        } else {
            prb <- Ftmb[- 1L, , drop = FALSE] - 
                   Ftmb[- nrow(Ftmb), , drop = FALSE]
        } 
        

        ftmb <- f(tmb)
        zu <- x * ftmb[- 1, , drop = FALSE] / prb
        if (rightcensored) zu[] <- 0 ### derivative of a constant
        zl <- x * ftmb[- nrow(ftmb), , drop = FALSE] / prb
        

        ret <- numeric(length(parm))
        ret[bidx] <- .colSums(zl, m = nrow(zl), n = ncol(zl))[-1L] -
                     .colSums(zu[-nrow(zu),,drop = FALSE], 
                              m = nrow(zu) - 1L, n = ncol(zu))[-1L]
        ret[- bidx] <- .rowSums(zu[-nrow(zu),,drop = FALSE] - 
                                zl[-1,,drop = FALSE], 
                                m = nrow(zu) - 1L, n = ncol(zu))
        return(- ret)
    }
    
    
    .nsr <- function(parm, x, mu = 0, rightcensored = FALSE) 
    {

        
        bidx <- seq_len(ncol(x) - 1L)
        delta <- c(0, mu + parm[bidx])
        intercepts <- c(-Inf, parm[- bidx], Inf)
        tmb <- intercepts - matrix(delta, nrow = length(intercepts),  
                                          ncol = ncol(x),
                                          byrow = TRUE)
        Ftmb <- F(tmb)
        if (rightcensored) {
            prb <- 1 - Ftmb[- nrow(Ftmb), , drop = FALSE]
        } else {
            prb <- Ftmb[- 1L, , drop = FALSE] - 
                   Ftmb[- nrow(Ftmb), , drop = FALSE]
        } 
        

        ftmb <- f(tmb)
        zu <- x * ftmb[- 1, , drop = FALSE] / prb
        if (rightcensored) zu[] <- 0 ### derivative of a constant
        zl <- x * ftmb[- nrow(ftmb), , drop = FALSE] / prb
        

        ret <- .rowSums(zl - zu, m = nrow(zl), n = ncol(zl)) / 
               .rowSums(x, m = nrow(x), n = ncol(x))
        ret[!is.finite(ret)] <- 0
        return(- ret)
    }
    
    
    .hes <- function(parm, x, mu = 0, rightcensored = FALSE, full = FALSE) 
    {

        
        bidx <- seq_len(ncol(x) - 1L)
        delta <- c(0, mu + parm[bidx])
        intercepts <- c(-Inf, parm[- bidx], Inf)
        tmb <- intercepts - matrix(delta, nrow = length(intercepts),  
                                          ncol = ncol(x),
                                          byrow = TRUE)
        Ftmb <- F(tmb)
        if (rightcensored) {
            prb <- 1 - Ftmb[- nrow(Ftmb), , drop = FALSE]
        } else {
            prb <- Ftmb[- 1L, , drop = FALSE] - 
                   Ftmb[- nrow(Ftmb), , drop = FALSE]
        } 
        

        ftmb <- f(tmb)
        fptmb <- fp(tmb)

        dl <- ftmb[- nrow(ftmb), , drop = FALSE]
        du <- ftmb[- 1, , drop = FALSE]
        if (rightcensored) du[] <- 0
        dpl <- fptmb[- nrow(ftmb), , drop = FALSE]
        dpu <- fptmb[- 1, , drop = FALSE]
        if (rightcensored) dpu[] <- 0
        dlm1 <- dl[,-1L, drop = FALSE]
        dum1 <- du[,-1L, drop = FALSE]
        dplm1 <- dpl[,-1L, drop = FALSE]
        dpum1 <- dpu[,-1L, drop = FALSE]
        prbm1 <- prb[,-1L, drop = FALSE]

        i1 <- length(intercepts) - 1L
        i2 <- 1L
        

        Aoffdiag <- - .rowSums(x * du * dl / prb^2, m = nrow(x), n = ncol(x))[-i2]
        Aoffdiag <- Aoffdiag[-length(Aoffdiag)]
        
        
        Adiag <- - .rowSums((x * dpu / prb)[-i1,,drop = FALSE] - 
                            (x * dpl / prb)[-i2,,drop = FALSE] - 
                            ((x * du^2 / prb^2)[-i1,,drop = FALSE] + 
                             (x * dl^2 / prb^2)[-i2,,drop = FALSE] ), 
                            m = nrow(x) - length(i1), n = ncol(x)
                           )
                          
        
        xm1 <- x[,-1L,drop = FALSE] 
        X <- ((xm1 * dpum1 / prbm1)[-i1,,drop = FALSE] - 
              (xm1 * dplm1 / prbm1)[-i2,,drop = FALSE] - 
              ((xm1 * dum1^2 / prbm1^2)[-i1,,drop = FALSE] - 
               (xm1 * dum1 * dlm1 / prbm1^2)[-i2,,drop = FALSE] -
               (xm1 * dum1 * dlm1 / prbm1^2)[-i1,,drop = FALSE] +
               (xm1 * dlm1^2 / prbm1^2)[-i2,,drop = FALSE]
              )
             )

        Z <- - .colSums(xm1 * (dpum1 / prbm1 - 
                               dplm1 / prbm1 -
                               (dum1^2 / prbm1^2 - 
                                2 * dum1 * dlm1 / prbm1^2 +
                                dlm1^2 / prbm1^2
                               )
                              ),
                        m = nrow(xm1), n = ncol(xm1)
                        )
        if (length(Z) > 1L) Z <- diag(Z)
        

        if (length(Adiag) > 1L) {
            if (!isFALSE(full)) {
                A <- list(Adiag = Adiag, Aoffdiag = Aoffdiag)
            } else {
                A <- Matrix::bandSparse(length(Adiag), 
                    k = 0:1, diagonals = list(Adiag, Aoffdiag), 
                    symmetric = TRUE)
            }
        } else {
            if (!isFALSE(full)) {
                A <- list(Adiag = Adiag, Aoffdiag = NULL)
            } else {
                A <- matrix(Adiag)
            }
        }
        return(list(A = A, X = X, Z = Z))
    }
    
    
    .snll <- function(parm, x, mu = 0, rightcensored = FALSE) 
    {

        
        C <- vapply(x, NROW, 0L) ### might differ by stratum
        K <- unique(do.call("c", lapply(x, ncol))) ### the same
        B <- length(x)
        sidx <- factor(rep(seq_len(B), times = pmax(0, C - 1L)), 
                       levels = seq_len(B))
        bidx <- seq_len(K - 1L)
        delta <- parm[bidx]
        intercepts <- split(parm[-bidx], sidx)
        

        ret <- 0
        for (b in seq_len(B))
            ret <- ret + .nll(c(delta, intercepts[[b]]), x[[b]], mu = mu,
                              rightcensored = rightcensored)
        return(ret)
    }
    
    
    .snsc <- function(parm, x, mu = 0, rightcensored = FALSE) 
    {

        
        C <- vapply(x, NROW, 0L) ### might differ by stratum
        K <- unique(do.call("c", lapply(x, ncol))) ### the same
        B <- length(x)
        sidx <- factor(rep(seq_len(B), times = pmax(0, C - 1L)), 
                       levels = seq_len(B))
        bidx <- seq_len(K - 1L)
        delta <- parm[bidx]
        intercepts <- split(parm[-bidx], sidx)
        

        ret <- numeric(length(bidx))
        for (b in seq_len(B)) {
            nsc <- .nsc(c(delta, intercepts[[b]]), x[[b]], mu = mu,
                        rightcensored = rightcensored)
            ret[bidx] <- ret[bidx] + nsc[bidx]
            ret <- c(ret, nsc[-bidx])
        }
        return(ret)
    }
    
    
    .shes <- function(parm, x, mu = 0, xrc = NULL, full = FALSE, 
                      retMatrix = FALSE) 
    {

        
        C <- vapply(x, NROW, 0L) ### might differ by stratum
        K <- unique(do.call("c", lapply(x, ncol))) ### the same
        B <- length(x)
        sidx <- factor(rep(seq_len(B), times = pmax(0, C - 1L)), 
                       levels = seq_len(B))
        bidx <- seq_len(K - 1L)
        delta <- parm[bidx]
        intercepts <- split(parm[-bidx], sidx)
        

        if (!isFALSE(ret <- full)) {
            
            for (b in seq_len(B)) {
                H <- .hes(c(delta, intercepts[[b]]), x[[b]], mu = mu, full = full)
                if (!is.null(xrc)) {
                    Hrc <- .hes(c(delta, intercepts[[b]]), xrc[[b]], mu = mu, 
                                rightcensored = TRUE, full = full)
                    H$X <- H$X + Hrc$X
                    H$A$Adiag <- H$A$Adiag + Hrc$A$Adiag
                    H$A$Aoffdiag <- H$A$Aoffdiag + Hrc$A$Aoffdiag
                    H$Z <- H$Z + Hrc$Z
                }
                if (b == 1L) {
                    Adiag <- H$A$Adiag
                    Aoffdiag <- H$A$Aoffdiag
                    X <- H$X
                    Z <- H$Z
                } else {
                    Adiag <- c(Adiag, H$A$Adiag)
                    Aoffdiag <- c(Aoffdiag, 0, H$A$Aoffdiag)
                    X <- rbind(X, H$X)
                    Z <- Z + H$Z
                }
            }

            if (length(Adiag) > 1L) {
                A <- Matrix::bandSparse(length(Adiag),
                                        k = 0:1, diagonals = list(Adiag, Aoffdiag),
                                        symmetric = TRUE)
            } else {
                A <- matrix(Adiag)
            }

            ret <- cbind(Z, t(X))
            ret <- rbind(ret, cbind(X, A))
            if (retMatrix) return(ret)
            return(as.matrix(ret))
            
        }
        ret <- matrix(0, nrow = length(bidx), ncol = length(bidx))
        for (b in seq_len(B)) {
            H <- .hes(c(delta, intercepts[[b]]), x[[b]], mu = mu)
            if (!is.null(xrc)) {
                Hrc <- .hes(c(delta, intercepts[[b]]), xrc[[b]], mu = mu, 
                            rightcensored = TRUE)
                H$X <- H$X + Hrc$X
                H$A <- H$A + Hrc$A
                H$Z <- H$Z + Hrc$Z
            }
            sAH <- tryCatch(Matrix::solve(H$A, H$X), error = function(e) NULL)
            if (is.null(sAH))
                stop(gettextf("error computing the Hessian in %s",
                              "free1way"),
                     domain = NA)
            ret <- ret + (H$Z - crossprod(H$X, sAH))
        }
        as.matrix(ret)
    }
    
    
    .snsr <- function(parm, x, mu = 0, rightcensored = FALSE) 
    {

        
        C <- vapply(x, NROW, 0L) ### might differ by stratum
        K <- unique(do.call("c", lapply(x, ncol))) ### the same
        B <- length(x)
        sidx <- factor(rep(seq_len(B), times = pmax(0, C - 1L)), 
                       levels = seq_len(B))
        bidx <- seq_len(K - 1L)
        delta <- parm[bidx]
        intercepts <- split(parm[-bidx], sidx)
        

        ret <- c()
        for (b in seq_len(B)) {
            idx <- attr(x[[b]], "idx")
            ### idx == 1L means zero residual, see definition of idx
            sr <- c(0, .nsr(c(delta, intercepts[[b]]), x[[b]], mu = mu,
                            rightcensored = rightcensored))
            ret <- c(ret, sr[idx])
        }
        return(ret)
    }
    
    
    fn <- function(par) 
    {
        ret <- .snll(par, x = xlist, mu = mu)
        if (!is.null(xrc))
            ret <- ret + .snll(par, x = xrclist, mu = mu, 
                               rightcensored = TRUE)
        return(ret)
    }
    gr <- function(par) 
    {
        ret <- .snsc(par, x = xlist, mu = mu)
        if (!is.null(xrc))
            ret <- ret + .snsc(par, x = xrclist, mu = mu, 
                               rightcensored = TRUE)
        return(ret)
    }

    ### allocate memory for hessian
    Hess <- Matrix::Matrix(0, nrow = length(start), ncol = length(start))

    he <- function(par) 
    {
        if (!is.null(xrc)) {
            ret <- .shes(par, x = xlist, mu = mu, xrc = xrclist, full = Hess, 
                         retMatrix = names(control)[1L] == ".NewtonRaphson")
        } else {
            ret <- .shes(par, x = xlist, mu = mu, full = Hess, 
                         retMatrix = names(control)[1L] == ".NewtonRaphson")
        }
        return(ret)
    }
    

    .profile <- function(start, fix = seq_len(K - 1)) 
    {
        if (!all(fix %in% seq_len(K - 1)))
            stop(gettextf("invalid argument '%s'", "fix"), domain = NA)
        delta <- start[fix]
        opargs <- list(start = start[-fix], 
                         objective = function(par) {
                             p <- numeric(length(par) + length(fix))
                             p[fix] <- delta
                             p[-fix] <- par
                             fn(p)
                         },
                         gradient = function(par) {
                             p <- numeric(length(par) + length(fix))
                             p[fix] <- delta
                             p[-fix] <- par
                             gr(p)[-fix]
                         },
                         hessian = function(par) {
                             p <- numeric(length(par) + length(fix))
                             p[fix] <- delta
                             p[-fix] <- par
                             he(p)[-fix, -fix, drop = FALSE]
                         })
        opargs$control <- control[[1L]]
        MPL_Jeffreys <- FALSE ### turn off Jeffreys penalisation in .profile

        
        maxit <- control[[1L]]$iter.max
        while(maxit < 10001) {
           ret <- do.call(names(control)[[1L]], opargs)
           maxit <- 5 * maxit
           if (ret$convergence > 0) {
               opargs$control$eval.max <- maxit
               opargs$control$iter.max <- maxit
               opargs$start <- ret$par
           } else {
               break()
           }
        }

        if (isTRUE(MPL_Jeffreys)) {
            
            .pll_Jeffreys <- function(cf, start) 
            {
                fix <- seq_along(cf)
                start[fix] <- cf
                ### compute profile likelihood w/o warnings
                ret <- suppressWarnings(.profile(start, fix = fix))
                Hfull <- he(ret$par)
                Hfix <- as.matrix(solve(solve(Hfull)[fix, fix]))
                return(ret$value - 
                       .5 * determinant(Hfix, logarithm = TRUE)$modulus)
            }
            if (K == 2) {
                MLcf <- ret$par[seq_len(K - 1)]
                Fret <- optim(MLcf, fn = .pll_Jeffreys, start = ret$par,
                              method = "Brent", lower = MLcf - 5, 
                              upper = MLcf + 5)
            } else {
                ### Nelder-Mead
                Fret <- optim(ret$par[seq_len(K - 1)], fn = .pll_Jeffreys, 
                              start = ret$par)
            }
            if (Fret$convergence == 0) {
                start <- ret$par
                start[seq_len(K - 1)] <- Fret$par
                ret <- .profile(start, fix = seq_len(K - 1))
                ret$objective <- ret$value
            }
            
        } else {
            if (ret$convergence > 0) {
                if (is.na(MPL_Jeffreys)) { ### only after failure
                    warning(gettextf("Jeffreys penalisation was applied in %s because initial optimisation failed with:",
                                     "free1way"),
                            "\n  ", ret$message, domain = NA)
                    MPL_Jeffreys <- TRUE
                    
                    .pll_Jeffreys <- function(cf, start) 
                    {
                        fix <- seq_along(cf)
                        start[fix] <- cf
                        ### compute profile likelihood w/o warnings
                        ret <- suppressWarnings(.profile(start, fix = fix))
                        Hfull <- he(ret$par)
                        Hfix <- as.matrix(solve(solve(Hfull)[fix, fix]))
                        return(ret$value - 
                               .5 * determinant(Hfix, logarithm = TRUE)$modulus)
                    }
                    if (K == 2) {
                        MLcf <- ret$par[seq_len(K - 1)]
                        Fret <- optim(MLcf, fn = .pll_Jeffreys, start = ret$par,
                                      method = "Brent", lower = MLcf - 5, 
                                      upper = MLcf + 5)
                    } else {
                        ### Nelder-Mead
                        Fret <- optim(ret$par[seq_len(K - 1)], fn = .pll_Jeffreys, 
                                      start = ret$par)
                    }
                    if (Fret$convergence == 0) {
                        start <- ret$par
                        start[seq_len(K - 1)] <- Fret$par
                        ret <- .profile(start, fix = seq_len(K - 1))
                        ret$objective <- ret$value
                    }
                    
                }
           }
        }
        if (ret$convergence > 0)
            warning(gettextf("unsuccessful optimisation in %s", "free1way"),
                    ": ", ret$message, domain = NA)

        ret$MPL_Jeffreys <- MPL_Jeffreys
        ret$value <- ret$objective
        ret$objective <- NULL
        

        p <- numeric(length(start))
        p[fix] <- delta
        p[-fix] <- ret$par
        ret$par <- p
        ret
    }
    
    
    if (!length(fix)) {
        opargs <- list(start = start, 
                       objective = fn, 
                       gradient = gr,
                       hessian = he)
        opargs$control <- control[[1L]]
        
        maxit <- control[[1L]]$iter.max
        while(maxit < 10001) {
           ret <- do.call(names(control)[[1L]], opargs)
           maxit <- 5 * maxit
           if (ret$convergence > 0) {
               opargs$control$eval.max <- maxit
               opargs$control$iter.max <- maxit
               opargs$start <- ret$par
           } else {
               break()
           }
        }

        if (isTRUE(MPL_Jeffreys)) {
            
            .pll_Jeffreys <- function(cf, start) 
            {
                fix <- seq_along(cf)
                start[fix] <- cf
                ### compute profile likelihood w/o warnings
                ret <- suppressWarnings(.profile(start, fix = fix))
                Hfull <- he(ret$par)
                Hfix <- as.matrix(solve(solve(Hfull)[fix, fix]))
                return(ret$value - 
                       .5 * determinant(Hfix, logarithm = TRUE)$modulus)
            }
            if (K == 2) {
                MLcf <- ret$par[seq_len(K - 1)]
                Fret <- optim(MLcf, fn = .pll_Jeffreys, start = ret$par,
                              method = "Brent", lower = MLcf - 5, 
                              upper = MLcf + 5)
            } else {
                ### Nelder-Mead
                Fret <- optim(ret$par[seq_len(K - 1)], fn = .pll_Jeffreys, 
                              start = ret$par)
            }
            if (Fret$convergence == 0) {
                start <- ret$par
                start[seq_len(K - 1)] <- Fret$par
                ret <- .profile(start, fix = seq_len(K - 1))
                ret$objective <- ret$value
            }
            
        } else {
            if (ret$convergence > 0) {
                if (is.na(MPL_Jeffreys)) { ### only after failure
                    warning(gettextf("Jeffreys penalisation was applied in %s because initial optimisation failed with:",
                                     "free1way"),
                            "\n  ", ret$message, domain = NA)
                    MPL_Jeffreys <- TRUE
                    
                    .pll_Jeffreys <- function(cf, start) 
                    {
                        fix <- seq_along(cf)
                        start[fix] <- cf
                        ### compute profile likelihood w/o warnings
                        ret <- suppressWarnings(.profile(start, fix = fix))
                        Hfull <- he(ret$par)
                        Hfix <- as.matrix(solve(solve(Hfull)[fix, fix]))
                        return(ret$value - 
                               .5 * determinant(Hfix, logarithm = TRUE)$modulus)
                    }
                    if (K == 2) {
                        MLcf <- ret$par[seq_len(K - 1)]
                        Fret <- optim(MLcf, fn = .pll_Jeffreys, start = ret$par,
                                      method = "Brent", lower = MLcf - 5, 
                                      upper = MLcf + 5)
                    } else {
                        ### Nelder-Mead
                        Fret <- optim(ret$par[seq_len(K - 1)], fn = .pll_Jeffreys, 
                                      start = ret$par)
                    }
                    if (Fret$convergence == 0) {
                        start <- ret$par
                        start[seq_len(K - 1)] <- Fret$par
                        ret <- .profile(start, fix = seq_len(K - 1))
                        ret$objective <- ret$value
                    }
                    
                }
           }
        }
        if (ret$convergence > 0)
            warning(gettextf("unsuccessful optimisation in %s", "free1way"),
                    ": ", ret$message, domain = NA)

        ret$MPL_Jeffreys <- MPL_Jeffreys
        ret$value <- ret$objective
        ret$objective <- NULL
        
    } else if (length(fix) == length(start)) {
        ret <- list(par = start, 
                    value = fn(start))
    } else {
        ret <- .profile(start, fix = fix)
    }
     
    
    if (is.null(fix) || (length(fix) == length(start)))
        parm <- seq_len(K - 1)
    else 
        parm <- fix
    if (any(parm >= K)) return(ret)

    ret$coefficients <- ret$par[parm]
    dn2 <- dimnames(xt)[2L]
    names(ret$coefficients) <- cnames <- paste0(names(dn2), dn2[[1L]][1L + parm])

    par <- ret$par
    intercepts <- function(parm, x) 
    {

        
        C <- vapply(x, NROW, 0L) ### might differ by stratum
        K <- unique(do.call("c", lapply(x, ncol))) ### the same
        B <- length(x)
        sidx <- factor(rep(seq_len(B), times = pmax(0, C - 1L)), 
                       levels = seq_len(B))
        bidx <- seq_len(K - 1L)
        delta <- parm[bidx]
        intercepts <- split(parm[-bidx], sidx)
        

        return(intercepts)
    }
    ret$intercepts <- intercepts(par, x = xlist)

    if (score) {
        ret$negscore <- .snsc(par, x = xlist, mu = mu)[parm]
        if (!is.null(xrc))
            ret$negscore <- ret$negscore + .snsc(par, x = xrclist, mu = mu, 
                                                 rightcensored = TRUE)[parm]
    }
    if (hessian) {
        if (!is.null(xrc)) {
            ret$hessian <- .shes(par, x = xlist, mu = mu, xrc = xrclist)
        } else {
            ret$hessian <- .shes(par, x = xlist, mu = mu)
        }
        ret$vcov <- solve(ret$hessian)
        if (length(parm) != nrow(ret$hessian))
           ret$hessian <- solve(ret$vcov <- ret$vcov[parm, parm, drop = FALSE])
        rownames(ret$vcov) <- colnames(ret$vcov) <- rownames(ret$hessian) <-
            colnames(ret$hessian) <-  cnames
    }
    if (residuals) {
        ret$negresiduals <- .snsr(par, x = xlist, mu = mu)
        if (!is.null(xrc)) {
            rcr <- .snsr(par, x = xrclist, mu = mu, rightcensored = TRUE)
            ret$negresiduals <- c(rbind(matrix(ret$negresiduals, nrow = C),
                                        matrix(rcr, nrow = C)))
         }
    }
    ret$profile <- function(start, fix)
        .free1wayML(xt, link = link, mu = mu, start = start, fix = fix, tol = tol, 
                    ...) 
    ret$table <- xt

    ret$strata <- strata
    ret$mu <- mu
    if (length(ret$mu) == 1) {
        names(ret$mu) <- link$parm
    } else {
        names(ret$mu) <- c(paste(link$parm, cnames[1L], sep = ":"), cnames[-1L])
    }
    

    class(ret) <- "free1wayML"
    ret
}


.SW <- function(res, xt) 
{

    if (length(dim(xt)) == 3L) {
        res <- matrix(res, nrow = dim(xt)[1L], ncol = dim(xt)[3])
        STAT <-  Exp <- Cov <- 0
        for (b in seq_len(dim(xt)[3L])) {
            sw <- .SW(res[,b, drop = TRUE], xt[,,b, drop = TRUE])
            STAT <- STAT + sw$Statistic
            Exp <- Exp + sw$Expectation
            Cov <- Cov + sw$Covariance
        }
        return(list(Statistic = STAT, Expectation = as.vector(Exp),
                    Covariance = Cov))
    }

    Y <- matrix(res, ncol = 1, nrow = length(xt))
    weights <- c(xt)
    x <- gl(ncol(xt), nrow(xt))
    X <- model.matrix(~ x, data = data.frame(x = x))[,-1L,drop = FALSE]

    w. <- sum(weights)
    wX <- weights * X
    wY <- weights * Y
    ExpX <- colSums(wX)
    ExpY <- colSums(wY) / w.
    CovX <- crossprod(X, wX)
    Yc <- t(t(Y) - ExpY)
    CovY <- crossprod(Yc, weights * Yc) / w.
    Exp <- kronecker(ExpY, ExpX)
    Cov <- w. / (w. - 1) * kronecker(CovY, CovX) -
           1 / (w. - 1) * kronecker(CovY, tcrossprod(ExpX))
    STAT <- crossprod(X, wY)
    list(Statistic = STAT, Expectation = as.vector(Exp),
         Covariance = Cov)
}


.resample <- function(res, xt, B = 10000) 
{

    if (length(dim(xt)) == 2L)
        xt <- as.table(array(xt, dim = c(dim(xt), 1)))

    res <- matrix(res, nrow = dim(xt)[1L], ncol = dim(xt)[3L])
    stat <- 0
    ret <- .SW(res, xt)
    if (dim(xt)[2L] == 2L) {
        ret$testStat <- c((ret$Statistic - ret$Expectation) / 
                          sqrt(c(ret$Covariance)))
    } else {
        ES <- ret$Statistic - ret$Expectation
        ret$testStat <- sum(ES * solve(ret$Covariance, ES))
    }
    ret$DF <- dim(xt)[2L] - 1L

    if (B) {
        for (j in 1:dim(xt)[3L]) {
           rt <- r2dtable(B, r = rowSums(xt[,,j]), c = colSums(xt[,,j]))
           stat <- stat + vapply(rt, 
               function(x) .colSums(x[,-1L, drop = FALSE] * res[,j], 
                                    m = nrow(x), n = ncol(x) - 1L), 
                                 FUN.VALUE = rep(0, dim(xt)[[2L]] - 1L))
        }
        if (dim(xt)[2L] == 2L) {
             ret$permStat <- (stat - ret$Expectation) / 
                              sqrt(c(ret$Covariance))
        } else {
            ES <- matrix(stat, ncol = B) - ret$Expectation
            ret$permStat <- .colSums(ES * solve(ret$Covariance, ES), 
                                     m = dim(xt)[[2L]] - 1L, n = B)
        }
    }
    ret
}


### distribution function
.p <- function(link, q, ...)
    link$linkinv(q = q, ...)

### quantile function
.q <- function(link, p, ...)
    link$link(p = p, ...)

### density function
.d <- function(link, x, ...)
    link$dlinkinv(x = x, ...)

### derivative of density function
.dd <- function(link, x, ...)
    link$ddlinkinv(x = x, ...)

### 2nd derivative of density function
.ddd <- function(link, x, ...)
    link$dddlinkinv(x = x, ...)

### ratio of derivative of density to
### density function
.dd2d <- function(link, x, ...)
    link$dd2dlinkinv(x = x, ...)

### constructor
linkfun <- function(name,	### nickname
                    alias,	### char 
                    model, 	### char, semiparametric model name
                    parm, 	### char, parameter name
                    link,      	### quantile function
                    linkinv,   	### distribution function
                    dlinkinv,  	### density function
                    ddlinkinv, 	### derivative of density function
                    ...) 
{

    ret <- list(name = name, 
                alias = alias,
                model = model,
                parm = parm,
                link = link,
                linkinv = linkinv,
                dlinkinv = dlinkinv,
                ddlinkinv = ddlinkinv)
    if (is.null(ret$dd2d)) 
        ret$dd2d <- function(x) 
            ret$ddlinkinv(x) / ret$dlinkinv(x)
    ret <- c(ret, list(...))
    class(ret) <- "linkfun"
    ret
}


logit <- function()
    linkfun(name = "Logit", 
            alias = c("Wilcoxon", "Kruskal-Wallis"),
            model = "proportional odds", 
            parm = "log-odds ratio",
            link = qlogis,
            linkinv = plogis,
            dlinkinv = dlogis,
            ddlinkinv = function(x) {
                p <- plogis(x)
                p * (1 - p)^2 - p^2 * (1 - p)
            },
            dddlinkinv = function(x) {
                ex <- exp(x)
                ifelse(is.finite(x), (ex - 4 * ex^2 + ex^3) / (1 + ex)^4, 0.0)
            },
            dd2d = function(x) {
                ex <- exp(x)
                (1 - ex) / (1 + ex)
            },
            parm2PI = function(x) {
               OR <- exp(x)
               ret <- OR * (OR - 1 - x)/(OR - 1)^2
               ret[abs(x) < .Machine$double.eps] <- 0.5
               return(ret)
            },
            PI2parm = function(p) {
               f <- function(x, PI)
                   x + (exp(-x) * (PI + 
                                   exp(2 * x) * (PI - 1) + 
                                   exp(x) * (1 - 2 * PI)))
               ret <- vapply(p, function(p) 
                   uniroot(f, PI = p, interval = 50 * c(-1, 1))$root, 0)
               return(ret)
            },
            parm2OVL = function(x) 2 * plogis(-abs(x / 2))
    )


probit <- function()
    linkfun(name = "Probit",
            alias = "van der Waerden normal scores",
            model = "latent normal shift", 
            parm = "generalised Cohen's d",
            link = qnorm,
            linkinv = pnorm,
            dlinkinv = dnorm,
            ddlinkinv = function(x) 
                ifelse(is.finite(x), -dnorm(x = x) * x, 0.0), 
            dddlinkinv = function(x) 
                ifelse(is.finite(x), dnorm(x = x) * (x^2 - 1), 0.0),
            dd2d = function(x) -x,
            parm2PI = function(x) pnorm(x, sd = sqrt(2)),
            PI2parm = function(p) qnorm(p, sd = sqrt(2)),
            parm2OVL = function(x) 2 * pnorm(-abs(x / 2))
    )


cloglog <- function()
    linkfun(name = "Complementary Log-log",
            alias = "Savage",
            model = "proportional hazards", 
            parm = "log-hazard ratio",
            link = function(p, log.p = FALSE) {
                if (log.p) p <- exp(p)
                log(-log1p(- p))
            },
            linkinv = function(q, lower.tail = TRUE, log.p = FALSE) {
                ### p = 1 - exp(-exp(q))
                ret <- exp(-exp(q))
                if (log.p) {
                    if (lower.tail)
                        return(log1p(-ret))
                    return(-exp(q))
                }
                if (lower.tail)
                    return(-expm1(-exp(q)))
                return(ret)
            },
            dlinkinv = function(x) 
                ifelse(is.finite(x), exp(x - exp(x)), 0.0),
            ddlinkinv = function(x) {
                ex <- exp(x)
                ifelse(is.finite(x), (ex - ex^2) / exp(ex), 0.0)
            },
            dddlinkinv = function(x) {
                ex <- exp(x)
                ifelse(is.finite(x), (ex - 3*ex^2 + ex^3) / exp(ex), 0.0)
            },
            dd2d = function(x)
               -expm1(x),
            parm2PI = plogis,
            PI2parm = qlogis,
            parm2OVL = function(x) {
                x <- abs(x)
                ret <- exp(x / (exp(-x) - 1)) - exp(-x / (exp(x) - 1)) + 1 
                ret[abs(x) < .Machine$double.eps] <- 1
                x[] <- ret
                return(x)
            }
    )


loglog <- function()
    linkfun(name = "Log-log",
            alias = "Lehmann", 
            model = "Lehmann", 
            parm = "log-reverse time hazard ratio",
            link = function(p, log.p = FALSE) {
                if (!log.p) p <- log(p)
                -log(-p)
            },
            linkinv = function(q, lower.tail = TRUE, log.p = FALSE) {
                ### p = exp(-exp(-q))
                if (log.p) {
                    if (lower.tail)
                        return(-exp(-q))
                    return(log1p(-exp(-exp(-q))))
                }
                if (lower.tail)
                    return(exp(-exp(-q)))
                -expm1(-exp(-q))
            },
            dlinkinv = function(x) 
                ifelse(is.finite(x), exp(- x - exp(-x)), 0.0),
            ddlinkinv = function(x) {
               ex <- exp(-x)
               ifelse(is.finite(x), exp(-ex - x) * (ex - 1.0), 0.0)
            },
            dddlinkinv = function(x) {
               ex <- exp(-x)
               ifelse(is.finite(x), exp(-x - ex) * (ex - 1)^2 - 
                                    exp(-ex - 2 * x), 
                                    0.0)
            },
            dd2d = function(x) 
                expm1(-x),
            parm2PI = plogis,
            PI2parm = qlogis,
            parm2OVL = function(x) {
                x <- abs(x)
                rt <- exp(-x / (exp(x) - 1))
                ret <- rt^exp(x) + 1 - rt
                ret[abs(x) < .Machine$double.eps] <- 1
                x[] <- ret
                return(x)
            }
    )


### adopted from rms:::lrm.fit
.NewtonRaphson <- function(start, objective, gradient, hessian, 
                           control = list(iter.max = 150, trace = trace, 
                                          objtol = 5e-4, gradtol = 1e-5, 
                                          paramtol = 1e-5, minstepsize = 1e-2, 
                                          tolsolve = .Machine$double.eps),
                           trace = FALSE)
{

    theta  <- start # Initialize the parameter vector
    oldobj <- Inf
    objthe <- objective(theta)
    if (!is.finite(objthe)) {
        msg <- "Infeasible starting values"
        return(list(par = theta, objective = objthe, convergence = 1, 
                    message = msg)) 
    }

    if(!suppressPackageStartupMessages(requireNamespace("Matrix")))
        stop(gettextf("%s needs package 'Matrix' correctly installed",
                      ".NewtonRaphson"),
                 domain = NA)

    for (iter in seq_len(control$iter.max)) {

        
        gradthe <- gradient(theta)     # Compute the gradient vector
        hessthe <- hessian(theta)      # Compute the Hessian matrix

        delta <- Matrix::solve(hessthe, gradthe, tol = control$tolsolve)

        if (control$trace)
            cat(iter, ': ', theta, "\n", sep = "")

        step_size <- 1L                # Initialize step size for step-halving
        

        # Step-halving loop
        while (TRUE) {
            
            new_theta <- theta - step_size * delta # Update parameter vector
            objnew_the <- objective(new_theta)

            if (control$trace)
                cat("Old, new, old - new objective:", 
                    objthe, objnew_the, objthe - objnew_the, "\n")

            # Objective function failed to be reduced or is infinite
            if (!is.finite(objnew_the) || (objnew_the > objthe + 1e-6)) {
                step_size <- step_size / 2         # Reduce the step size

                if (control$trace) 
                    cat("Step size reduced to", step_size, "\n")

                if (step_size <= control$minstepsize) {
                    msg <- paste("Step size ", step_size, 
                                 " has reduced below minstepsize")
                    return(list(par = theta, objective = objthe, convergence = 1, 
                                message = msg)) 
                }
            } else {
                theta  <- new_theta	# accept the new parameter vector
                oldobj <- objthe
                objthe <- objnew_the
                break
            }
            
        }

        
        # Convergence check - must meet 3 criteria
        if ((objthe <= oldobj + 1e-6 && (oldobj - objthe < control$objtol)) &&
            (max(abs(gradthe)) < control$gradtol) &&
            (max(abs(delta)) < control$paramtol))

            return(list(par            = theta,
                        objective      = objthe,
                        convergence    = 0,
                        message        = "Normal convergence"))
         
    }

    msg <- paste("Reached", control$iter.max, "iterations without convergence")
    return(list(par = theta, objective = objthe, convergence = 1, message = msg)) 
}

@

We start implementing the log-likelihood function for parameters \code{parm}
$= \thetavec$ (assuming only a single block) with data from a two-way $C
\times K$ contingency table \code{x}. 

From $\thetavec$, we first extract the shift parameters $\delta_k, k = 2,
\dots, K$ and
then the intercept parameters $\vartheta_{c,1}, c = 1, \dots, C - 1$ and evaluate the probabilities
\code{prb} $ = \Prob(y_{c - 1} < Y \le y_c \mid \rT = k, \rS = 1)$ for all
groups:

\begin{flushleft} \small
\begin{minipage}{\linewidth}\label{scrap1}\raggedright\small
\NWtarget{nuweb3a}{} $\langle\,${\itshape parm to prob}\nobreak\ {\footnotesize {3a}}$\,\rangle\equiv$
\vspace{-1ex}
\begin{list}{}{} \item
\mbox{}\verb@@\\
\mbox{}\verb@bidx <- seq_len(ncol(x) - 1L)@\\
\mbox{}\verb@delta <- c(0, mu + parm[bidx])@\\
\mbox{}\verb@intercepts <- c(-Inf, parm[- bidx], Inf)@\\
\mbox{}\verb@tmb <- intercepts - matrix(delta, nrow = length(intercepts),  @\\
\mbox{}\verb@                                  ncol = ncol(x),@\\
\mbox{}\verb@                                  byrow = TRUE)@\\
\mbox{}\verb@Ftmb <- F(tmb)@\\
\mbox{}\verb@if (rightcensored) {@\\
\mbox{}\verb@    prb <- 1 - Ftmb[- nrow(Ftmb), , drop = FALSE]@\\
\mbox{}\verb@} else {@\\
\mbox{}\verb@    prb <- Ftmb[- 1L, , drop = FALSE] - @\\
\mbox{}\verb@           Ftmb[- nrow(Ftmb), , drop = FALSE]@\\
\mbox{}\verb@} @\\
\mbox{}\verb@@{\NWsep}
\end{list}
\vspace{-1.5ex}
\footnotesize
\begin{list}{}{\setlength{\itemsep}{-\parsep}\setlength{\itemindent}{-\leftmargin}}
\item \NWtxtMacroRefIn\ \NWlink{nuweb3b}{3b}\NWlink{nuweb4b}{, 4b}\NWlink{nuweb4c}{c}\NWlink{nuweb7}{, 7}.

\item{}
\end{list}
\end{minipage}\vspace{4ex}
\end{flushleft}
If the table \code{x} represents right-censored observations, we compute
\code{prb} $ = 1 - \Prob(Y \le y_c \mid  \rT = k, \rS = 1)$.

With default null values $\mu_k = 0, k = 2, \dots, K$, we define the
negative log-likelihood function as the weighted (by number of observations) sum of
the log-probabilities

\begin{flushleft} \small
\begin{minipage}{\linewidth}\label{scrap2}\raggedright\small
\NWtarget{nuweb3b}{} $\langle\,${\itshape negative logLik}\nobreak\ {\footnotesize {3b}}$\,\rangle\equiv$
\vspace{-1ex}
\begin{list}{}{} \item
\mbox{}\verb@@\\
\mbox{}\verb@.nll <- function(parm, x, mu = 0, rightcensored = FALSE) @\\
\mbox{}\verb@{@\\
\mbox{}\verb@@\\
\mbox{}\verb@    @\hbox{$\langle\,${\itshape parm to prob}\nobreak\ {\footnotesize \NWlink{nuweb3a}{3a}}$\,\rangle$}\verb@@\\
\mbox{}\verb@    if (any(prb < .Machine$double.eps^10)) @\\
\mbox{}\verb@        return(Inf)@\\
\mbox{}\verb@    return(- sum(x * log(prb)))@\\
\mbox{}\verb@}@\\
\mbox{}\verb@@{\NWsep}
\end{list}
\vspace{-1.5ex}
\footnotesize
\begin{list}{}{\setlength{\itemsep}{-\parsep}\setlength{\itemindent}{-\leftmargin}}
\item \NWtxtMacroRefIn\ \NWlink{nuweb33}{33}.

\item{}
\end{list}
\end{minipage}\vspace{4ex}
\end{flushleft}
The code assumes that all elements of the margins of the table \code{x} are larger than
zero; otherwise, the corresponding parameter is not identified. We will
handle such situation at a higher level later on.

It is important to note that, with $F$ corresponding to distribution with
log-concave density $f$, the negative log-likelihood is a convex function of
the parameters $\thetavec$, and thus we can solve the corresponding
constrained minimisation problem quickly and reliably.

Next, we implement the gradient of the negative
log-likelihood, the negative score function for the parameters in
$\thetavec$. The score function for the empirical likelihood, evaluated at
parameters is given in many places
\citep[for example in][Formula~(2)]{HothornMoestBuehlmann2017}. 
We begin computing the ratio of $f(\vartheta_{c,1} -
\delta_k)$ and the corresponding likelihood

\begin{flushleft} \small
\begin{minipage}{\linewidth}\label{scrap3}\raggedright\small
\NWtarget{nuweb4a}{} $\langle\,${\itshape density prob ratio}\nobreak\ {\footnotesize {4a}}$\,\rangle\equiv$
\vspace{-1ex}
\begin{list}{}{} \item
\mbox{}\verb@@\\
\mbox{}\verb@ftmb <- f(tmb)@\\
\mbox{}\verb@zu <- x * ftmb[- 1, , drop = FALSE] / prb@\\
\mbox{}\verb@if (rightcensored) zu[] <- 0 ### derivative of a constant@\\
\mbox{}\verb@zl <- x * ftmb[- nrow(ftmb), , drop = FALSE] / prb@\\
\mbox{}\verb@@{\NWsep}
\end{list}
\vspace{-1.5ex}
\footnotesize
\begin{list}{}{\setlength{\itemsep}{-\parsep}\setlength{\itemindent}{-\leftmargin}}
\item \NWtxtMacroRefIn\ \NWlink{nuweb4b}{4b}\NWlink{nuweb4c}{c}.

\item{}
\end{list}
\end{minipage}\vspace{4ex}
\end{flushleft}
and then compute the negative score function:

\begin{flushleft} \small
\begin{minipage}{\linewidth}\label{scrap4}\raggedright\small
\NWtarget{nuweb4b}{} $\langle\,${\itshape negative score}\nobreak\ {\footnotesize {4b}}$\,\rangle\equiv$
\vspace{-1ex}
\begin{list}{}{} \item
\mbox{}\verb@@\\
\mbox{}\verb@.nsc <- function(parm, x, mu = 0, rightcensored = FALSE) @\\
\mbox{}\verb@{@\\
\mbox{}\verb@@\\
\mbox{}\verb@    @\hbox{$\langle\,${\itshape parm to prob}\nobreak\ {\footnotesize \NWlink{nuweb3a}{3a}}$\,\rangle$}\verb@@\\
\mbox{}\verb@@\\
\mbox{}\verb@    @\hbox{$\langle\,${\itshape density prob ratio}\nobreak\ {\footnotesize \NWlink{nuweb4a}{4a}}$\,\rangle$}\verb@@\\
\mbox{}\verb@@\\
\mbox{}\verb@    ret <- numeric(length(parm))@\\
\mbox{}\verb@    ret[bidx] <- .colSums(zl, m = nrow(zl), n = ncol(zl))[-1L] -@\\
\mbox{}\verb@                 .colSums(zu[-nrow(zu),,drop = FALSE], @\\
\mbox{}\verb@                          m = nrow(zu) - 1L, n = ncol(zu))[-1L]@\\
\mbox{}\verb@    ret[- bidx] <- .rowSums(zu[-nrow(zu),,drop = FALSE] - @\\
\mbox{}\verb@                            zl[-1,,drop = FALSE], @\\
\mbox{}\verb@                            m = nrow(zu) - 1L, n = ncol(zu))@\\
\mbox{}\verb@    return(- ret)@\\
\mbox{}\verb@}@\\
\mbox{}\verb@@{\NWsep}
\end{list}
\vspace{-1.5ex}
\footnotesize
\begin{list}{}{\setlength{\itemsep}{-\parsep}\setlength{\itemindent}{-\leftmargin}}
\item \NWtxtMacroRefIn\ \NWlink{nuweb33}{33}.

\item{}
\end{list}
\end{minipage}\vspace{4ex}
\end{flushleft}
In addition, we define negative score residuals, that is, the derivative of the
negative log-likelihood with respect to an intercept term constrained to
zero:

\begin{flushleft} \small
\begin{minipage}{\linewidth}\label{scrap5}\raggedright\small
\NWtarget{nuweb4c}{} $\langle\,${\itshape negative score residuals}\nobreak\ {\footnotesize {4c}}$\,\rangle\equiv$
\vspace{-1ex}
\begin{list}{}{} \item
\mbox{}\verb@@\\
\mbox{}\verb@.nsr <- function(parm, x, mu = 0, rightcensored = FALSE) @\\
\mbox{}\verb@{@\\
\mbox{}\verb@@\\
\mbox{}\verb@    @\hbox{$\langle\,${\itshape parm to prob}\nobreak\ {\footnotesize \NWlink{nuweb3a}{3a}}$\,\rangle$}\verb@@\\
\mbox{}\verb@@\\
\mbox{}\verb@    @\hbox{$\langle\,${\itshape density prob ratio}\nobreak\ {\footnotesize \NWlink{nuweb4a}{4a}}$\,\rangle$}\verb@@\\
\mbox{}\verb@@\\
\mbox{}\verb@    ret <- .rowSums(zl - zu, m = nrow(zl), n = ncol(zl)) / @\\
\mbox{}\verb@           .rowSums(x, m = nrow(x), n = ncol(x))@\\
\mbox{}\verb@    ret[!is.finite(ret)] <- 0@\\
\mbox{}\verb@    return(- ret)@\\
\mbox{}\verb@}@\\
\mbox{}\verb@@{\NWsep}
\end{list}
\vspace{-1.5ex}
\footnotesize
\begin{list}{}{\setlength{\itemsep}{-\parsep}\setlength{\itemindent}{-\leftmargin}}
\item \NWtxtMacroRefIn\ \NWlink{nuweb33}{33}.

\item{}
\end{list}
\end{minipage}\vspace{4ex}
\end{flushleft}
We also need access to the observed Fisher information of the shift
parameters. We proceed by implementing the Hessian for the intercept
($\vartheta_\cdot$) and shift ($\delta_\cdot$) parameters, as given in Formula~(4) of
\cite{HothornMoestBuehlmann2017} first. This partitioned matrix
\begin{eqnarray*}
\mH(\vartheta_1, \dots, \vartheta_{C - 1}, \delta_2, \dots, \delta_K) = 
\left(\begin{array}{ll}
\mA & \X \\
\X^\top & \Z
\end{array} \right)
\end{eqnarray*}
consists of a symmetric tridiagonal $\mA \sim (C-1,C-1)$, a diagonal $\Z \sim (K - 1, K -
1)$, and a full $\X \sim (C - 1, K - 1)$ matrix. In a second step, we
compute the Fisher information matrix for the shift parameters only by means
of the Schur complement $\Z - \X^\top \mA^{-1} \X$.

In addition to probabilities \code{prb}, the Hessian necessitates the
computation of $f(\vartheta_{c,1} - \delta_k)$ and $f^\prime(\vartheta_{c,1} -
\delta_k)$. We start preparing these objects, keeping in mind to remove terms
not being present under right-censoring:

\begin{flushleft} \small
\begin{minipage}{\linewidth}\label{scrap6}\raggedright\small
\NWtarget{nuweb5a}{} $\langle\,${\itshape Hessian prep}\nobreak\ {\footnotesize {5a}}$\,\rangle\equiv$
\vspace{-1ex}
\begin{list}{}{} \item
\mbox{}\verb@@\\
\mbox{}\verb@ftmb <- f(tmb)@\\
\mbox{}\verb@fptmb <- fp(tmb)@\\
\mbox{}\verb@@\\
\mbox{}\verb@dl <- ftmb[- nrow(ftmb), , drop = FALSE]@\\
\mbox{}\verb@du <- ftmb[- 1, , drop = FALSE]@\\
\mbox{}\verb@if (rightcensored) du[] <- 0@\\
\mbox{}\verb@dpl <- fptmb[- nrow(ftmb), , drop = FALSE]@\\
\mbox{}\verb@dpu <- fptmb[- 1, , drop = FALSE]@\\
\mbox{}\verb@if (rightcensored) dpu[] <- 0@\\
\mbox{}\verb@dlm1 <- dl[,-1L, drop = FALSE]@\\
\mbox{}\verb@dum1 <- du[,-1L, drop = FALSE]@\\
\mbox{}\verb@dplm1 <- dpl[,-1L, drop = FALSE]@\\
\mbox{}\verb@dpum1 <- dpu[,-1L, drop = FALSE]@\\
\mbox{}\verb@prbm1 <- prb[,-1L, drop = FALSE]@\\
\mbox{}\verb@@\\
\mbox{}\verb@i1 <- length(intercepts) - 1L@\\
\mbox{}\verb@i2 <- 1L@\\
\mbox{}\verb@@{\NWsep}
\end{list}
\vspace{-1.5ex}
\footnotesize
\begin{list}{}{\setlength{\itemsep}{-\parsep}\setlength{\itemindent}{-\leftmargin}}
\item \NWtxtMacroRefIn\ \NWlink{nuweb7}{7}.

\item{}
\end{list}
\end{minipage}\vspace{4ex}
\end{flushleft}
The off-diagonal elements of $\mA$ are now available as
\begin{flushleft} \small
\begin{minipage}{\linewidth}\label{scrap7}\raggedright\small
\NWtarget{nuweb5b}{} $\langle\,${\itshape off-diagonal elements for Hessian of intercepts}\nobreak\ {\footnotesize {5b}}$\,\rangle\equiv$
\vspace{-1ex}
\begin{list}{}{} \item
\mbox{}\verb@@\\
\mbox{}\verb@Aoffdiag <- - .rowSums(x * du * dl / prb^2, m = nrow(x), n = ncol(x))[-i2]@\\
\mbox{}\verb@Aoffdiag <- Aoffdiag[-length(Aoffdiag)]@\\
\mbox{}\verb@@{\NWsep}
\end{list}
\vspace{-1.5ex}
\footnotesize
\begin{list}{}{\setlength{\itemsep}{-\parsep}\setlength{\itemindent}{-\leftmargin}}
\item \NWtxtMacroRefIn\ \NWlink{nuweb7}{7}.

\item{}
\end{list}
\end{minipage}\vspace{4ex}
\end{flushleft}
and the diagonal elements of $\mA$ as
\begin{flushleft} \small
\begin{minipage}{\linewidth}\label{scrap8}\raggedright\small
\NWtarget{nuweb5c}{} $\langle\,${\itshape diagonal elements for Hessian of intercepts}\nobreak\ {\footnotesize {5c}}$\,\rangle\equiv$
\vspace{-1ex}
\begin{list}{}{} \item
\mbox{}\verb@@\\
\mbox{}\verb@Adiag <- - .rowSums((x * dpu / prb)[-i1,,drop = FALSE] - @\\
\mbox{}\verb@                    (x * dpl / prb)[-i2,,drop = FALSE] - @\\
\mbox{}\verb@                    ((x * du^2 / prb^2)[-i1,,drop = FALSE] + @\\
\mbox{}\verb@                     (x * dl^2 / prb^2)[-i2,,drop = FALSE] ), @\\
\mbox{}\verb@                    m = nrow(x) - length(i1), n = ncol(x)@\\
\mbox{}\verb@                   )@\\
\mbox{}\verb@                  @\\
\mbox{}\verb@@{\NWsep}
\end{list}
\vspace{-1.5ex}
\footnotesize
\begin{list}{}{\setlength{\itemsep}{-\parsep}\setlength{\itemindent}{-\leftmargin}}
\item \NWtxtMacroRefIn\ \NWlink{nuweb7}{7}.

\item{}
\end{list}
\end{minipage}\vspace{4ex}
\end{flushleft}
For the computation of $\X$ and $\Z$, the observations corresponding to the
control group ($k = 1$) are irrelevant, we remove these first

\begin{flushleft} \small
\begin{minipage}{\linewidth}\label{scrap9}\raggedright\small
\NWtarget{nuweb6}{} $\langle\,${\itshape intercept / shift contributions to Hessian}\nobreak\ {\footnotesize {6}}$\,\rangle\equiv$
\vspace{-1ex}
\begin{list}{}{} \item
\mbox{}\verb@@\\
\mbox{}\verb@xm1 <- x[,-1L,drop = FALSE] @\\
\mbox{}\verb@X <- ((xm1 * dpum1 / prbm1)[-i1,,drop = FALSE] - @\\
\mbox{}\verb@      (xm1 * dplm1 / prbm1)[-i2,,drop = FALSE] - @\\
\mbox{}\verb@      ((xm1 * dum1^2 / prbm1^2)[-i1,,drop = FALSE] - @\\
\mbox{}\verb@       (xm1 * dum1 * dlm1 / prbm1^2)[-i2,,drop = FALSE] -@\\
\mbox{}\verb@       (xm1 * dum1 * dlm1 / prbm1^2)[-i1,,drop = FALSE] +@\\
\mbox{}\verb@       (xm1 * dlm1^2 / prbm1^2)[-i2,,drop = FALSE]@\\
\mbox{}\verb@      )@\\
\mbox{}\verb@     )@\\
\mbox{}\verb@@\\
\mbox{}\verb@Z <- - .colSums(xm1 * (dpum1 / prbm1 - @\\
\mbox{}\verb@                       dplm1 / prbm1 -@\\
\mbox{}\verb@                       (dum1^2 / prbm1^2 - @\\
\mbox{}\verb@                        2 * dum1 * dlm1 / prbm1^2 +@\\
\mbox{}\verb@                        dlm1^2 / prbm1^2@\\
\mbox{}\verb@                       )@\\
\mbox{}\verb@                      ),@\\
\mbox{}\verb@                m = nrow(xm1), n = ncol(xm1)@\\
\mbox{}\verb@                )@\\
\mbox{}\verb@if (length(Z) > 1L) Z <- diag(Z)@\\
\mbox{}\verb@@{\NWsep}
\end{list}
\vspace{-1.5ex}
\footnotesize
\begin{list}{}{\setlength{\itemsep}{-\parsep}\setlength{\itemindent}{-\leftmargin}}
\item \NWtxtMacroRefIn\ \NWlink{nuweb7}{7}.

\item{}
\end{list}
\end{minipage}\vspace{4ex}
\end{flushleft}
We return the three matrices $\mA$, $\X$, and $\Z$ necessary for two
different purposes: We need the \code{full} Hessian for all parameters $\thetavec$ as a
dense \code{matrix} such that \code{nlminb} can compute updates from this
object. In addition, the
computation of the Fisher information for $\delta_2, \dots, \delta_K$ as the Schur
complement $\Z - \X^\top \mA^{-1} \X$. Because the matrix $\mA$ is symmetric
tridiagonal, we use infrastructure from the \pkg{Matrix} package to
represent this matrix:

\begin{flushleft} \small
\begin{minipage}{\linewidth}\label{scrap10}\raggedright\small
\NWtarget{nuweb7}{} $\langle\,${\itshape Hessian}\nobreak\ {\footnotesize {7}}$\,\rangle\equiv$
\vspace{-1ex}
\begin{list}{}{} \item
\mbox{}\verb@@\\
\mbox{}\verb@.hes <- function(parm, x, mu = 0, rightcensored = FALSE, full = FALSE) @\\
\mbox{}\verb@{@\\
\mbox{}\verb@@\\
\mbox{}\verb@    @\hbox{$\langle\,${\itshape parm to prob}\nobreak\ {\footnotesize \NWlink{nuweb3a}{3a}}$\,\rangle$}\verb@@\\
\mbox{}\verb@@\\
\mbox{}\verb@    @\hbox{$\langle\,${\itshape Hessian prep}\nobreak\ {\footnotesize \NWlink{nuweb5a}{5a}}$\,\rangle$}\verb@@\\
\mbox{}\verb@@\\
\mbox{}\verb@    @\hbox{$\langle\,${\itshape off-diagonal elements for Hessian of intercepts}\nobreak\ {\footnotesize \NWlink{nuweb5b}{5b}}$\,\rangle$}\verb@@\\
\mbox{}\verb@    @\hbox{$\langle\,${\itshape diagonal elements for Hessian of intercepts}\nobreak\ {\footnotesize \NWlink{nuweb5c}{5c}}$\,\rangle$}\verb@@\\
\mbox{}\verb@    @\hbox{$\langle\,${\itshape intercept / shift contributions to Hessian}\nobreak\ {\footnotesize \NWlink{nuweb6}{6}}$\,\rangle$}\verb@@\\
\mbox{}\verb@@\\
\mbox{}\verb@    if (length(Adiag) > 1L) {@\\
\mbox{}\verb@        if (!isFALSE(full)) {@\\
\mbox{}\verb@            A <- list(Adiag = Adiag, Aoffdiag = Aoffdiag)@\\
\mbox{}\verb@        } else {@\\
\mbox{}\verb@            A <- Matrix::bandSparse(length(Adiag), @\\
\mbox{}\verb@                k = 0:1, diagonals = list(Adiag, Aoffdiag), @\\
\mbox{}\verb@                symmetric = TRUE)@\\
\mbox{}\verb@        }@\\
\mbox{}\verb@    } else {@\\
\mbox{}\verb@        if (!isFALSE(full)) {@\\
\mbox{}\verb@            A <- list(Adiag = Adiag, Aoffdiag = NULL)@\\
\mbox{}\verb@        } else {@\\
\mbox{}\verb@            A <- matrix(Adiag)@\\
\mbox{}\verb@        }@\\
\mbox{}\verb@    }@\\
\mbox{}\verb@    return(list(A = A, X = X, Z = Z))@\\
\mbox{}\verb@}@\\
\mbox{}\verb@@{\NWsep}
\end{list}
\vspace{-1.5ex}
\footnotesize
\begin{list}{}{\setlength{\itemsep}{-\parsep}\setlength{\itemindent}{-\leftmargin}}
\item \NWtxtMacroRefIn\ \NWlink{nuweb33}{33}.

\item{}
\end{list}
\end{minipage}\vspace{4ex}
\end{flushleft}
We start with an example involving $K = 3$ groups for a binary outcome and
use a binary logistic regression model to estimate the two log-odds ratios
$\delta_2$ and $\delta_3$ along with their estimated covariance
<<glm>>=
library("free1way.docreg")
(x <- matrix(c(10, 5, 7, 11, 8, 9), nrow = 2))
d <- expand.grid(y = relevel(gl(2, 1), "2"), t = gl(3, 1))
d$x <- c(x)
m <- glm(y ~ t, data = d, weights = x, family = binomial())
(cf <- coef(m))
@

Replicating these results requires specification of the inverse link
function $F = \text{expit}$ and the density function $f$ of the standard
logistic. We use \code{optim} with numerically approximated Hessian to be
able to check the correctness of the analytical Hessian. 
Note that \code{glm} operates with a positive linear predictor, so
we need to change the sign of the log-odds ratios:

<<glm-op>>=
F <- plogis
f <- dlogis
op <- optim(par = c("mt2" = 0, "mt3" = 0, "(Intercept)" = 0), 
            fn = .nll, gr = .nsc, 
            x = x, method = "BFGS", hessian = TRUE)
cbind(glm = c(cf[-1] * -1, cf[1]), free1way = op$par)
logLik(m)
-op$value
@

Parameter estimates and the in-sample log-likelihood are practically
identical. We now turn to the inverse Hessian of the shift terms, first
defining the derivative of the density of the standard logistic distribution
<<glm-H>>=
fp <- function(x) 
{
    p <- plogis(x)
    p * (1 - p)^2 - p^2 * (1 - p)
}
H <- .hes(op$par, x)
### analytical covariance of parameters
solve(H$Z - crossprod(H$X, Matrix::solve(H$A, H$X)))
### numerical covariance
solve(op$hessian)[1:2,1:2]
### from glm
vcov(m)[-1,-1]
@
Also here we see practically identical results. We will later implement a
low-level function \code{.free1way} taking a table and an object describing the inverse link
$F$ as arguments; these results are also in line with \code{glm}:
<<glm-free1way>>=
obj <- .free1wayML(as.table(x), link = logit())
obj$coefficients
-obj$value
### analytical covariance
obj$vcov
@

In the next step, we extend our results to the stratified case. We iterate
over all blocks and evaluate the negative log-likelihood for the same values
of the shift parameters but block-specific values of the intercept
parameters. Because \code{x} is a \code{table}, it may happen (especially in
the presence of blocks) that some outcome values (rows) were not observed
in any group (row sum is zero). Thus, the distribution function does not
jump at this value and therefore no parameter for this value is needed. We
remove these cases. In the right-censored case, we have to pay attention to
censoring happening at these outcome values. We count how many obervations
were censored between observations and assign the corresponding weights 
to the subsequent outcome value. Furthermore, we need to be able to undo the
removal of observations later, mainly for computing residuals. We store an
attribute \code{idx} for later use in \code{.snsr} on page
\pageref{lab:snsr}.

\begin{flushleft} \small
\begin{minipage}{\linewidth}\label{scrap11}\raggedright\small
\NWtarget{nuweb9}{} $\langle\,${\itshape determine steps in blocks}\nobreak\ {\footnotesize {9}}$\,\rangle\equiv$
\vspace{-1ex}
\begin{list}{}{} \item
\mbox{}\verb@@\\
\mbox{}\verb@xlist <- xrclist <- vector(mode = "list", length = B)@\\
\mbox{}\verb@@\\
\mbox{}\verb@for (b in seq_len(B)) {@\\
\mbox{}\verb@    xb <- matrix(x[,,b, drop = TRUE], ncol = K)@\\
\mbox{}\verb@    xw <- rowSums(abs(xb)) > 0@\\
\mbox{}\verb@    if (sum(xw) > 1L) {@\\
\mbox{}\verb@        ### do not remove last parameter if there are corresponding@\\
\mbox{}\verb@        ### right-censored observations@\\
\mbox{}\verb@        wm <- which(xw)[sum(xw)]@\\
\mbox{}\verb@        if (!is.null(xrc) && any(xrc[wm:dx[1],,b,drop = TRUE] > 0))@\\
\mbox{}\verb@            xw[length(xw)] <- TRUE@\\
\mbox{}\verb@        xlist[[b]] <- xb[xw,,drop = FALSE]@\\
\mbox{}\verb@        Cidx <- rep.int(1L, times = C)@\\
\mbox{}\verb@        Cidx[xw] <- Cidx[xw] + seq_len(sum(xw))@\\
\mbox{}\verb@        attr(xlist[[b]], "idx") <- Cidx@\\
\mbox{}\verb@        if (!is.null(xrc)) {@\\
\mbox{}\verb@            ### count right-censored observations between distinct event@\\
\mbox{}\verb@            ### times@\\
\mbox{}\verb@            cs <- apply(xrc[,,b,drop = TRUE] * (!xw), 2, function(x) @\\
\mbox{}\verb@                diff(c(0, cumsum(x)[xw])))@\\
\mbox{}\verb@            xrclist[[b]] <- matrix(xrc[xw,,b,drop = TRUE], ncol = K) + cs@\\
\mbox{}\verb@            idx <- seq_len(C)[xw]@\\
\mbox{}\verb@            idx <- rep(seq_len(sum(xw)), times = c(idx[1], diff(idx)))@\\
\mbox{}\verb@            Cidx <- rep.int(1L, times = C)@\\
\mbox{}\verb@            Cidx[seq_along(idx)] <- Cidx[seq_along(idx)] + idx@\\
\mbox{}\verb@            attr(xrclist[[b]], "idx") <- Cidx@\\
\mbox{}\verb@        }@\\
\mbox{}\verb@    }@\\
\mbox{}\verb@}@\\
\mbox{}\verb@### remove empty blocks@\\
\mbox{}\verb@strata <- !vapply(xlist, is.null, NA)@\\
\mbox{}\verb@xlist <- xlist[strata]@\\
\mbox{}\verb@xrclist <- xrclist[strata]@\\
\mbox{}\verb@@{\NWsep}
\end{list}
\vspace{-1.5ex}
\footnotesize
\begin{list}{}{\setlength{\itemsep}{-\parsep}\setlength{\itemindent}{-\leftmargin}}
\item \NWtxtMacroRefIn\ \NWlink{nuweb10a}{10a}.

\item{}
\end{list}
\end{minipage}\vspace{4ex}
\end{flushleft}
Before we begin, we convert the table $C \times K \times B
(\times 2)$ table \code{x} into a list of non-empty $C^\prime \times K$
tables (yet still allowing zero row sums):

\begin{flushleft} \small
\begin{minipage}{\linewidth}\label{scrap12}\raggedright\small
\NWtarget{nuweb10a}{} $\langle\,${\itshape table2list body}\nobreak\ {\footnotesize {10a}}$\,\rangle\equiv$
\vspace{-1ex}
\begin{list}{}{} \item
\mbox{}\verb@@\\
\mbox{}\verb@dx <- dim(x)@\\
\mbox{}\verb@if (length(dx) == 1L)@\\
\mbox{}\verb@    stop("incorrect dimensions")@\\
\mbox{}\verb@if (length(dx) == 2L)@\\
\mbox{}\verb@    x <- as.table(array(x, dim = c(dx, 1)))@\\
\mbox{}\verb@dx <- dim(x)@\\
\mbox{}\verb@if (length(dx) < 3L)@\\
\mbox{}\verb@    stop("incorrect dimensions")@\\
\mbox{}\verb@C <- dim(x)[1L]@\\
\mbox{}\verb@K <- dim(x)[2L]@\\
\mbox{}\verb@B <- dim(x)[3L]@\\
\mbox{}\verb@if (C < 2L)@\\
\mbox{}\verb@    stop("at least two response categories required")@\\
\mbox{}\verb@if (K < 2L)@\\
\mbox{}\verb@    stop("at least two groups required")@\\
\mbox{}\verb@xrc <- NULL@\\
\mbox{}\verb@if (length(dx) == 4L) {@\\
\mbox{}\verb@    if (dx[4] == 2L) {@\\
\mbox{}\verb@        xrc <- array(x[,,,"FALSE", drop = TRUE], dim = dx[1:3])@\\
\mbox{}\verb@        x <- array(x[,,,"TRUE", drop = TRUE], dim = dx[1:3])@\\
\mbox{}\verb@    } else {@\\
\mbox{}\verb@        stop(gettextf("%s currently only allows independent right-censoring",@\\
\mbox{}\verb@                      "free1way"),@\\
\mbox{}\verb@             domain = NA)@\\
\mbox{}\verb@    }@\\
\mbox{}\verb@}@\\
\mbox{}\verb@@\\
\mbox{}\verb@@\hbox{$\langle\,${\itshape determine steps in blocks}\nobreak\ {\footnotesize \NWlink{nuweb9}{9}}$\,\rangle$}\verb@@\\
\mbox{}\verb@@{\NWsep}
\end{list}
\vspace{-1.5ex}
\footnotesize
\begin{list}{}{\setlength{\itemsep}{-\parsep}\setlength{\itemindent}{-\leftmargin}}
\item \NWtxtMacroRefIn\ \NWlink{nuweb26}{26}.

\item{}
\end{list}
\end{minipage}\vspace{4ex}
\end{flushleft}
We first extract the shift parameters $\delta_{\cdot}$ and then, separately
for each stratum, the corresponding contrasts of the intercept parameters:

\begin{flushleft} \small
\begin{minipage}{\linewidth}\label{scrap13}\raggedright\small
\NWtarget{nuweb10b}{} $\langle\,${\itshape stratum prep}\nobreak\ {\footnotesize {10b}}$\,\rangle\equiv$
\vspace{-1ex}
\begin{list}{}{} \item
\mbox{}\verb@@\\
\mbox{}\verb@C <- vapply(x, NROW, 0L) ### might differ by stratum@\\
\mbox{}\verb@K <- unique(do.call("c", lapply(x, ncol))) ### the same@\\
\mbox{}\verb@B <- length(x)@\\
\mbox{}\verb@sidx <- factor(rep(seq_len(B), times = pmax(0, C - 1L)), @\\
\mbox{}\verb@               levels = seq_len(B))@\\
\mbox{}\verb@bidx <- seq_len(K - 1L)@\\
\mbox{}\verb@delta <- parm[bidx]@\\
\mbox{}\verb@intercepts <- split(parm[-bidx], sidx)@\\
\mbox{}\verb@@{\NWsep}
\end{list}
\vspace{-1.5ex}
\footnotesize
\begin{list}{}{\setlength{\itemsep}{-\parsep}\setlength{\itemindent}{-\leftmargin}}
\item \NWtxtMacroRefIn\ \NWlink{nuweb11a}{11a}\NWlink{nuweb11b}{b}\NWlink{nuweb11c}{c}\NWlink{nuweb14}{, 14}\NWlink{nuweb31}{, 31}.

\item{}
\end{list}
\end{minipage}\vspace{4ex}
\end{flushleft}
before we loop over the non-empty strata and return the sum of the
corresponding log-likelihoods:

\begin{flushleft} \small
\begin{minipage}{\linewidth}\label{scrap14}\raggedright\small
\NWtarget{nuweb11a}{} $\langle\,${\itshape stratified negative logLik}\nobreak\ {\footnotesize {11a}}$\,\rangle\equiv$
\vspace{-1ex}
\begin{list}{}{} \item
\mbox{}\verb@@\\
\mbox{}\verb@.snll <- function(parm, x, mu = 0, rightcensored = FALSE) @\\
\mbox{}\verb@{@\\
\mbox{}\verb@@\\
\mbox{}\verb@    @\hbox{$\langle\,${\itshape stratum prep}\nobreak\ {\footnotesize \NWlink{nuweb10b}{10b}}$\,\rangle$}\verb@@\\
\mbox{}\verb@@\\
\mbox{}\verb@    ret <- 0@\\
\mbox{}\verb@    for (b in seq_len(B))@\\
\mbox{}\verb@        ret <- ret + .nll(c(delta, intercepts[[b]]), x[[b]], mu = mu,@\\
\mbox{}\verb@                          rightcensored = rightcensored)@\\
\mbox{}\verb@    return(ret)@\\
\mbox{}\verb@}@\\
\mbox{}\verb@@{\NWsep}
\end{list}
\vspace{-1.5ex}
\footnotesize
\begin{list}{}{\setlength{\itemsep}{-\parsep}\setlength{\itemindent}{-\leftmargin}}
\item \NWtxtMacroRefIn\ \NWlink{nuweb33}{33}.

\item{}
\end{list}
\end{minipage}\vspace{4ex}
\end{flushleft}
In a similar way, we evaluate the gradients for each block and sum-up the
contributions by the shift parameters whereas the gradients for the
intercept parameters are only concatenated:

\begin{flushleft} \small
\begin{minipage}{\linewidth}\label{scrap15}\raggedright\small
\NWtarget{nuweb11b}{} $\langle\,${\itshape stratified negative score}\nobreak\ {\footnotesize {11b}}$\,\rangle\equiv$
\vspace{-1ex}
\begin{list}{}{} \item
\mbox{}\verb@@\\
\mbox{}\verb@.snsc <- function(parm, x, mu = 0, rightcensored = FALSE) @\\
\mbox{}\verb@{@\\
\mbox{}\verb@@\\
\mbox{}\verb@    @\hbox{$\langle\,${\itshape stratum prep}\nobreak\ {\footnotesize \NWlink{nuweb10b}{10b}}$\,\rangle$}\verb@@\\
\mbox{}\verb@@\\
\mbox{}\verb@    ret <- numeric(length(bidx))@\\
\mbox{}\verb@    for (b in seq_len(B)) {@\\
\mbox{}\verb@        nsc <- .nsc(c(delta, intercepts[[b]]), x[[b]], mu = mu,@\\
\mbox{}\verb@                    rightcensored = rightcensored)@\\
\mbox{}\verb@        ret[bidx] <- ret[bidx] + nsc[bidx]@\\
\mbox{}\verb@        ret <- c(ret, nsc[-bidx])@\\
\mbox{}\verb@    }@\\
\mbox{}\verb@    return(ret)@\\
\mbox{}\verb@}@\\
\mbox{}\verb@@{\NWsep}
\end{list}
\vspace{-1.5ex}
\footnotesize
\begin{list}{}{\setlength{\itemsep}{-\parsep}\setlength{\itemindent}{-\leftmargin}}
\item \NWtxtMacroRefIn\ \NWlink{nuweb33}{33}.

\item{}
\end{list}
\end{minipage}\vspace{4ex}
\end{flushleft}
The score residuum is zero for an observation with weight zero, that is, a
row of zeros in the table: \label{lab:snsr}

\begin{flushleft} \small
\begin{minipage}{\linewidth}\label{scrap16}\raggedright\small
\NWtarget{nuweb11c}{} $\langle\,${\itshape stratified negative score residual}\nobreak\ {\footnotesize {11c}}$\,\rangle\equiv$
\vspace{-1ex}
\begin{list}{}{} \item
\mbox{}\verb@@\\
\mbox{}\verb@.snsr <- function(parm, x, mu = 0, rightcensored = FALSE) @\\
\mbox{}\verb@{@\\
\mbox{}\verb@@\\
\mbox{}\verb@    @\hbox{$\langle\,${\itshape stratum prep}\nobreak\ {\footnotesize \NWlink{nuweb10b}{10b}}$\,\rangle$}\verb@@\\
\mbox{}\verb@@\\
\mbox{}\verb@    ret <- c()@\\
\mbox{}\verb@    for (b in seq_len(B)) {@\\
\mbox{}\verb@        idx <- attr(x[[b]], "idx")@\\
\mbox{}\verb@        ### idx == 1L means zero residual, see definition of idx@\\
\mbox{}\verb@        sr <- c(0, .nsr(c(delta, intercepts[[b]]), x[[b]], mu = mu,@\\
\mbox{}\verb@                        rightcensored = rightcensored))@\\
\mbox{}\verb@        ret <- c(ret, sr[idx])@\\
\mbox{}\verb@    }@\\
\mbox{}\verb@    return(ret)@\\
\mbox{}\verb@}@\\
\mbox{}\verb@@{\NWsep}
\end{list}
\vspace{-1.5ex}
\footnotesize
\begin{list}{}{\setlength{\itemsep}{-\parsep}\setlength{\itemindent}{-\leftmargin}}
\item \NWtxtMacroRefIn\ \NWlink{nuweb33}{33}.

\item{}
\end{list}
\end{minipage}\vspace{4ex}
\end{flushleft}
<<glm-stratum>>=
(x <- as.table(array(c(10, 5, 7, 11, 8, 9,
                        9, 4, 8, 15, 5, 4), dim = c(2, 3, 2))))
d <- expand.grid(y = relevel(gl(2, 1), "2"), t = gl(3, 1), s = gl(2, 1))
d$x <- c(x)
m <- glm(y ~ 0 + s + t, data = d, weights = x, family = binomial())
logLik(m)
(cf <- coef(m))
@

<<glm-op-stratum>>=
xl <- .table2list(x)$xlist
op <- optim(par = c("mt2" = 0, "mt3" = 0, "(Intercept 1)" = 0, "(Intercept 2)" = 0), 
            fn = .snll, gr = .snsc, 
            x = xl, 
            method = "BFGS", 
            hessian = TRUE)
cbind(glm = c(cf[-(1:2)] * -1, cf[1:2]), free1way = op$par)
logLik(m)
-op$value
@

For the analytical Hessian, we sum-up over the stratum-specific
Hessians of the shift parameters. For right-censored observations, we need
to compute the contributions by the events and obtain the joint Hessian for
shift- and intercept parameters first. We differentiate between computing
the null Hessian for $\thetavec$ as a dense \code{matrix}:

\begin{flushleft} \small
\begin{minipage}{\linewidth}\label{scrap17}\raggedright\small
\NWtarget{nuweb13}{} $\langle\,${\itshape full Hessian}\nobreak\ {\footnotesize {13}}$\,\rangle\equiv$
\vspace{-1ex}
\begin{list}{}{} \item
\mbox{}\verb@@\\
\mbox{}\verb@for (b in seq_len(B)) {@\\
\mbox{}\verb@    H <- .hes(c(delta, intercepts[[b]]), x[[b]], mu = mu, full = full)@\\
\mbox{}\verb@    if (!is.null(xrc)) {@\\
\mbox{}\verb@        Hrc <- .hes(c(delta, intercepts[[b]]), xrc[[b]], mu = mu, @\\
\mbox{}\verb@                    rightcensored = TRUE, full = full)@\\
\mbox{}\verb@        H$X <- H$X + Hrc$X@\\
\mbox{}\verb@        H$A$Adiag <- H$A$Adiag + Hrc$A$Adiag@\\
\mbox{}\verb@        H$A$Aoffdiag <- H$A$Aoffdiag + Hrc$A$Aoffdiag@\\
\mbox{}\verb@        H$Z <- H$Z + Hrc$Z@\\
\mbox{}\verb@    }@\\
\mbox{}\verb@    if (b == 1L) {@\\
\mbox{}\verb@        Adiag <- H$A$Adiag@\\
\mbox{}\verb@        Aoffdiag <- H$A$Aoffdiag@\\
\mbox{}\verb@        X <- H$X@\\
\mbox{}\verb@        Z <- H$Z@\\
\mbox{}\verb@    } else {@\\
\mbox{}\verb@        Adiag <- c(Adiag, H$A$Adiag)@\\
\mbox{}\verb@        Aoffdiag <- c(Aoffdiag, 0, H$A$Aoffdiag)@\\
\mbox{}\verb@        X <- rbind(X, H$X)@\\
\mbox{}\verb@        Z <- Z + H$Z@\\
\mbox{}\verb@    }@\\
\mbox{}\verb@}@\\
\mbox{}\verb@@\\
\mbox{}\verb@if (length(Adiag) > 1L) {@\\
\mbox{}\verb@    A <- Matrix::bandSparse(length(Adiag),@\\
\mbox{}\verb@                            k = 0:1, diagonals = list(Adiag, Aoffdiag),@\\
\mbox{}\verb@                            symmetric = TRUE)@\\
\mbox{}\verb@} else {@\\
\mbox{}\verb@    A <- matrix(Adiag)@\\
\mbox{}\verb@}@\\
\mbox{}\verb@@\\
\mbox{}\verb@ret <- cbind(Z, t(X))@\\
\mbox{}\verb@ret <- rbind(ret, cbind(X, A))@\\
\mbox{}\verb@if (retMatrix) return(ret)@\\
\mbox{}\verb@return(as.matrix(ret))@\\
\mbox{}\verb@@{\NWsep}
\end{list}
\vspace{-1.5ex}
\footnotesize
\begin{list}{}{\setlength{\itemsep}{-\parsep}\setlength{\itemindent}{-\leftmargin}}
\item \NWtxtMacroRefIn\ \NWlink{nuweb14}{14}.

\item{}
\end{list}
\end{minipage}\vspace{4ex}
\end{flushleft}
and the computation of the Hessian for the shift parameters using
\code{Matrix} technology:

\begin{flushleft} \small
\begin{minipage}{\linewidth}\label{scrap18}\raggedright\small
\NWtarget{nuweb14}{} $\langle\,${\itshape stratified Hessian}\nobreak\ {\footnotesize {14}}$\,\rangle\equiv$
\vspace{-1ex}
\begin{list}{}{} \item
\mbox{}\verb@@\\
\mbox{}\verb@.shes <- function(parm, x, mu = 0, xrc = NULL, full = FALSE, @\\
\mbox{}\verb@                  retMatrix = FALSE) @\\
\mbox{}\verb@{@\\
\mbox{}\verb@@\\
\mbox{}\verb@    @\hbox{$\langle\,${\itshape stratum prep}\nobreak\ {\footnotesize \NWlink{nuweb10b}{10b}}$\,\rangle$}\verb@@\\
\mbox{}\verb@@\\
\mbox{}\verb@    if (!isFALSE(ret <- full)) {@\\
\mbox{}\verb@        @\hbox{$\langle\,${\itshape full Hessian}\nobreak\ {\footnotesize \NWlink{nuweb13}{13}}$\,\rangle$}\verb@@\\
\mbox{}\verb@    }@\\
\mbox{}\verb@    ret <- matrix(0, nrow = length(bidx), ncol = length(bidx))@\\
\mbox{}\verb@    for (b in seq_len(B)) {@\\
\mbox{}\verb@        H <- .hes(c(delta, intercepts[[b]]), x[[b]], mu = mu)@\\
\mbox{}\verb@        if (!is.null(xrc)) {@\\
\mbox{}\verb@            Hrc <- .hes(c(delta, intercepts[[b]]), xrc[[b]], mu = mu, @\\
\mbox{}\verb@                        rightcensored = TRUE)@\\
\mbox{}\verb@            H$X <- H$X + Hrc$X@\\
\mbox{}\verb@            H$A <- H$A + Hrc$A@\\
\mbox{}\verb@            H$Z <- H$Z + Hrc$Z@\\
\mbox{}\verb@        }@\\
\mbox{}\verb@        sAH <- tryCatch(Matrix::solve(H$A, H$X), error = function(e) NULL)@\\
\mbox{}\verb@        if (is.null(sAH))@\\
\mbox{}\verb@            stop(gettextf("error computing the Hessian in %s",@\\
\mbox{}\verb@                          "free1way"),@\\
\mbox{}\verb@                 domain = NA)@\\
\mbox{}\verb@        ret <- ret + (H$Z - crossprod(H$X, sAH))@\\
\mbox{}\verb@    }@\\
\mbox{}\verb@    as.matrix(ret)@\\
\mbox{}\verb@}@\\
\mbox{}\verb@@{\NWsep}
\end{list}
\vspace{-1.5ex}
\footnotesize
\begin{list}{}{\setlength{\itemsep}{-\parsep}\setlength{\itemindent}{-\leftmargin}}
\item \NWtxtMacroRefIn\ \NWlink{nuweb33}{33}.

\item{}
\end{list}
\end{minipage}\vspace{4ex}
\end{flushleft}
<<glm-H-stratum>>=
### analytical covariance of parameters
solve(.shes(op$par, xl))
### numerical covariance
solve(op$hessian)[1:2,1:2]
### from glm
vcov(m)[-(1:2),-(1:2)]
@

<<glm-free1way-strata>>=
obj <- .free1wayML(as.table(x), link = logit())
obj$coefficients
-obj$value
### analytical covariance
obj$vcov
@
	

\chapter{Link Functions}
\label{ch:link}

Similar to \code{family} objects, we provide some infrastructure for
\code{link} functions $F^{-1}$ and derived quantities (\code{linkinv} $F$,
\code{dlinkinv} $f$, and \code{ddlinkinv} $f^\prime$). If not provided, we also
set-up the ratio $f^\prime / f$ in the constructor.

Although there is some overlap with \code{family} objects for binomial
outcomes, it doesn't seem beneficial to extend this richer class.

\begin{flushleft} \small
\begin{minipage}{\linewidth}\label{scrap19}\raggedright\small
\NWtarget{nuweb16}{} \verb@"linkfun.R"@\nobreak\ {\footnotesize {16}}$\equiv$
\vspace{-1ex}
\begin{list}{}{} \item
\mbox{}\verb@@\\
\mbox{}\verb@#  File src/library/stats/R/linkfun.R@\\
\mbox{}\verb@#  Part of the R package, https://www.R-project.org@\\
\mbox{}\verb@#@\\
\mbox{}\verb@#  Copyright (C) 2026 The R Core Team@\\
\mbox{}\verb@#@\\
\mbox{}\verb@#  This program is free software; you can redistribute it and/or modify@\\
\mbox{}\verb@#  it under the terms of the GNU General Public License as published by@\\
\mbox{}\verb@#  the Free Software Foundation; either version 2 of the License, or@\\
\mbox{}\verb@#  (at your option) any later version.@\\
\mbox{}\verb@#@\\
\mbox{}\verb@#  This program is distributed in the hope that it will be useful,@\\
\mbox{}\verb@#  but WITHOUT ANY WARRANTY; without even the implied warranty of@\\
\mbox{}\verb@#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the@\\
\mbox{}\verb@#  GNU General Public License for more details.@\\
\mbox{}\verb@#@\\
\mbox{}\verb@#  A copy of the GNU General Public License is available at@\\
\mbox{}\verb@#  https://www.R-project.org/Licenses/@\\
\mbox{}\verb@@\\
\mbox{}\verb@@\hbox{$\langle\,${\itshape linkfun}\nobreak\ {\footnotesize \NWlink{nuweb17}{17}}$\,\rangle$}\verb@@\\
\mbox{}\verb@@\hbox{$\langle\,${\itshape logit}\nobreak\ {\footnotesize \NWlink{nuweb18}{18}}$\,\rangle$}\verb@@\\
\mbox{}\verb@@\hbox{$\langle\,${\itshape probit}\nobreak\ {\footnotesize \NWlink{nuweb21}{21}}$\,\rangle$}\verb@@\\
\mbox{}\verb@@\hbox{$\langle\,${\itshape cloglog}\nobreak\ {\footnotesize \NWlink{nuweb20}{20}}$\,\rangle$}\verb@@\\
\mbox{}\verb@@\hbox{$\langle\,${\itshape loglog}\nobreak\ {\footnotesize \NWlink{nuweb19}{19}}$\,\rangle$}\verb@@\\
\mbox{}\verb@@{\NWsep}
\end{list}
\vspace{-1.5ex}
\footnotesize
\begin{list}{}{\setlength{\itemsep}{-\parsep}\setlength{\itemindent}{-\leftmargin}}

\item{}
\end{list}
\end{minipage}\vspace{4ex}
\end{flushleft}
\begin{flushleft} \small
\begin{minipage}{\linewidth}\label{scrap20}\raggedright\small
\NWtarget{nuweb17}{} $\langle\,${\itshape linkfun}\nobreak\ {\footnotesize {17}}$\,\rangle\equiv$
\vspace{-1ex}
\begin{list}{}{} \item
\mbox{}\verb@@\\
\mbox{}\verb@### distribution function@\\
\mbox{}\verb@.p <- function(link, q, ...)@\\
\mbox{}\verb@    link$linkinv(q = q, ...)@\\
\mbox{}\verb@@\\
\mbox{}\verb@### quantile function@\\
\mbox{}\verb@.q <- function(link, p, ...)@\\
\mbox{}\verb@    link$link(p = p, ...)@\\
\mbox{}\verb@@\\
\mbox{}\verb@### density function@\\
\mbox{}\verb@.d <- function(link, x, ...)@\\
\mbox{}\verb@    link$dlinkinv(x = x, ...)@\\
\mbox{}\verb@@\\
\mbox{}\verb@### derivative of density function@\\
\mbox{}\verb@.dd <- function(link, x, ...)@\\
\mbox{}\verb@    link$ddlinkinv(x = x, ...)@\\
\mbox{}\verb@@\\
\mbox{}\verb@### 2nd derivative of density function@\\
\mbox{}\verb@.ddd <- function(link, x, ...)@\\
\mbox{}\verb@    link$dddlinkinv(x = x, ...)@\\
\mbox{}\verb@@\\
\mbox{}\verb@### ratio of derivative of density to@\\
\mbox{}\verb@### density function@\\
\mbox{}\verb@.dd2d <- function(link, x, ...)@\\
\mbox{}\verb@    link$dd2dlinkinv(x = x, ...)@\\
\mbox{}\verb@@\\
\mbox{}\verb@### constructor@\\
\mbox{}\verb@linkfun <- function(name,       ### nickname@\\
\mbox{}\verb@                    alias,      ### char @\\
\mbox{}\verb@                    model,      ### char, semiparametric model name@\\
\mbox{}\verb@                    parm,       ### char, parameter name@\\
\mbox{}\verb@                    link,       ### quantile function@\\
\mbox{}\verb@                    linkinv,    ### distribution function@\\
\mbox{}\verb@                    dlinkinv,   ### density function@\\
\mbox{}\verb@                    ddlinkinv,  ### derivative of density function@\\
\mbox{}\verb@                    ...) @\\
\mbox{}\verb@{@\\
\mbox{}\verb@@\\
\mbox{}\verb@    ret <- list(name = name, @\\
\mbox{}\verb@                alias = alias,@\\
\mbox{}\verb@                model = model,@\\
\mbox{}\verb@                parm = parm,@\\
\mbox{}\verb@                link = link,@\\
\mbox{}\verb@                linkinv = linkinv,@\\
\mbox{}\verb@                dlinkinv = dlinkinv,@\\
\mbox{}\verb@                ddlinkinv = ddlinkinv)@\\
\mbox{}\verb@    if (is.null(ret$dd2d)) @\\
\mbox{}\verb@        ret$dd2d <- function(x) @\\
\mbox{}\verb@            ret$ddlinkinv(x) / ret$dlinkinv(x)@\\
\mbox{}\verb@    ret <- c(ret, list(...))@\\
\mbox{}\verb@    class(ret) <- "linkfun"@\\
\mbox{}\verb@    ret@\\
\mbox{}\verb@}@\\
\mbox{}\verb@@{\NWsep}
\end{list}
\vspace{-1.5ex}
\footnotesize
\begin{list}{}{\setlength{\itemsep}{-\parsep}\setlength{\itemindent}{-\leftmargin}}
\item \NWtxtMacroRefIn\ \NWlink{nuweb16}{16}.

\item{}
\end{list}
\end{minipage}\vspace{4ex}
\end{flushleft}
We start with the logit link, that is $F(z) = (1 + \exp(-z))^{-1}$, giving rise
to Wilcoxon or Kruskal-Wallis type score residuals:

\begin{flushleft} \small
\begin{minipage}{\linewidth}\label{scrap21}\raggedright\small
\NWtarget{nuweb18}{} $\langle\,${\itshape logit}\nobreak\ {\footnotesize {18}}$\,\rangle\equiv$
\vspace{-1ex}
\begin{list}{}{} \item
\mbox{}\verb@@\\
\mbox{}\verb@logit <- function()@\\
\mbox{}\verb@    linkfun(name = "Logit", @\\
\mbox{}\verb@            alias = c("Wilcoxon", "Kruskal-Wallis"),@\\
\mbox{}\verb@            model = "proportional odds", @\\
\mbox{}\verb@            parm = "log-odds ratio",@\\
\mbox{}\verb@            link = qlogis,@\\
\mbox{}\verb@            linkinv = plogis,@\\
\mbox{}\verb@            dlinkinv = dlogis,@\\
\mbox{}\verb@            ddlinkinv = function(x) {@\\
\mbox{}\verb@                p <- plogis(x)@\\
\mbox{}\verb@                p * (1 - p)^2 - p^2 * (1 - p)@\\
\mbox{}\verb@            },@\\
\mbox{}\verb@            dddlinkinv = function(x) {@\\
\mbox{}\verb@                ex <- exp(x)@\\
\mbox{}\verb@                ifelse(is.finite(x), (ex - 4 * ex^2 + ex^3) / (1 + ex)^4, 0.0)@\\
\mbox{}\verb@            },@\\
\mbox{}\verb@            dd2d = function(x) {@\\
\mbox{}\verb@                ex <- exp(x)@\\
\mbox{}\verb@                (1 - ex) / (1 + ex)@\\
\mbox{}\verb@            },@\\
\mbox{}\verb@            parm2PI = function(x) {@\\
\mbox{}\verb@               OR <- exp(x)@\\
\mbox{}\verb@               ret <- OR * (OR - 1 - x)/(OR - 1)^2@\\
\mbox{}\verb@               ret[abs(x) < .Machine$double.eps] <- 0.5@\\
\mbox{}\verb@               return(ret)@\\
\mbox{}\verb@            },@\\
\mbox{}\verb@            PI2parm = function(p) {@\\
\mbox{}\verb@               f <- function(x, PI)@\\
\mbox{}\verb@                   x + (exp(-x) * (PI + @\\
\mbox{}\verb@                                   exp(2 * x) * (PI - 1) + @\\
\mbox{}\verb@                                   exp(x) * (1 - 2 * PI)))@\\
\mbox{}\verb@               ret <- vapply(p, function(p) @\\
\mbox{}\verb@                   uniroot(f, PI = p, interval = 50 * c(-1, 1))$root, 0)@\\
\mbox{}\verb@               return(ret)@\\
\mbox{}\verb@            },@\\
\mbox{}\verb@            parm2OVL = function(x) 2 * plogis(-abs(x / 2))@\\
\mbox{}\verb@    )@\\
\mbox{}\verb@@{\NWsep}
\end{list}
\vspace{-1.5ex}
\footnotesize
\begin{list}{}{\setlength{\itemsep}{-\parsep}\setlength{\itemindent}{-\leftmargin}}
\item \NWtxtMacroRefIn\ \NWlink{nuweb16}{16}.

\item{}
\end{list}
\end{minipage}\vspace{4ex}
\end{flushleft}
The \code{parm2PI} function converts log-odds ratios to probabilistic
indices (or AUCs) and the inverse operation is implemented by
\code{PI2parm}. The overlap coefficient can be obtained from a log-odds
ratio via \code{parm2OVL}.

The log-log link, with $F(z) = \exp(-\exp(-z))$, is used to construct tests
against Lehmann alternatives:

\begin{flushleft} \small
\begin{minipage}{\linewidth}\label{scrap22}\raggedright\small
\NWtarget{nuweb19}{} $\langle\,${\itshape loglog}\nobreak\ {\footnotesize {19}}$\,\rangle\equiv$
\vspace{-1ex}
\begin{list}{}{} \item
\mbox{}\verb@@\\
\mbox{}\verb@loglog <- function()@\\
\mbox{}\verb@    linkfun(name = "Log-log",@\\
\mbox{}\verb@            alias = "Lehmann", @\\
\mbox{}\verb@            model = "Lehmann", @\\
\mbox{}\verb@            parm = "log-reverse time hazard ratio",@\\
\mbox{}\verb@            link = function(p, log.p = FALSE) {@\\
\mbox{}\verb@                if (!log.p) p <- log(p)@\\
\mbox{}\verb@                -log(-p)@\\
\mbox{}\verb@            },@\\
\mbox{}\verb@            linkinv = function(q, lower.tail = TRUE, log.p = FALSE) {@\\
\mbox{}\verb@                ### p = exp(-exp(-q))@\\
\mbox{}\verb@                if (log.p) {@\\
\mbox{}\verb@                    if (lower.tail)@\\
\mbox{}\verb@                        return(-exp(-q))@\\
\mbox{}\verb@                    return(log1p(-exp(-exp(-q))))@\\
\mbox{}\verb@                }@\\
\mbox{}\verb@                if (lower.tail)@\\
\mbox{}\verb@                    return(exp(-exp(-q)))@\\
\mbox{}\verb@                -expm1(-exp(-q))@\\
\mbox{}\verb@            },@\\
\mbox{}\verb@            dlinkinv = function(x) @\\
\mbox{}\verb@                ifelse(is.finite(x), exp(- x - exp(-x)), 0.0),@\\
\mbox{}\verb@            ddlinkinv = function(x) {@\\
\mbox{}\verb@               ex <- exp(-x)@\\
\mbox{}\verb@               ifelse(is.finite(x), exp(-ex - x) * (ex - 1.0), 0.0)@\\
\mbox{}\verb@            },@\\
\mbox{}\verb@            dddlinkinv = function(x) {@\\
\mbox{}\verb@               ex <- exp(-x)@\\
\mbox{}\verb@               ifelse(is.finite(x), exp(-x - ex) * (ex - 1)^2 - @\\
\mbox{}\verb@                                    exp(-ex - 2 * x), @\\
\mbox{}\verb@                                    0.0)@\\
\mbox{}\verb@            },@\\
\mbox{}\verb@            dd2d = function(x) @\\
\mbox{}\verb@                expm1(-x),@\\
\mbox{}\verb@            parm2PI = plogis,@\\
\mbox{}\verb@            PI2parm = qlogis,@\\
\mbox{}\verb@            parm2OVL = function(x) {@\\
\mbox{}\verb@                x <- abs(x)@\\
\mbox{}\verb@                rt <- exp(-x / (exp(x) - 1))@\\
\mbox{}\verb@                ret <- rt^exp(x) + 1 - rt@\\
\mbox{}\verb@                ret[abs(x) < .Machine$double.eps] <- 1@\\
\mbox{}\verb@                x[] <- ret@\\
\mbox{}\verb@                return(x)@\\
\mbox{}\verb@            }@\\
\mbox{}\verb@    )@\\
\mbox{}\verb@@{\NWsep}
\end{list}
\vspace{-1.5ex}
\footnotesize
\begin{list}{}{\setlength{\itemsep}{-\parsep}\setlength{\itemindent}{-\leftmargin}}
\item \NWtxtMacroRefIn\ \NWlink{nuweb16}{16}.

\item{}
\end{list}
\end{minipage}\vspace{4ex}
\end{flushleft}
The complementary log-log link, with $F(z) = 1 - \exp(-\exp(z))$, provides
log-rank or Savage score residuals against proportional hazards
alternatives:

\begin{flushleft} \small
\begin{minipage}{\linewidth}\label{scrap23}\raggedright\small
\NWtarget{nuweb20}{} $\langle\,${\itshape cloglog}\nobreak\ {\footnotesize {20}}$\,\rangle\equiv$
\vspace{-1ex}
\begin{list}{}{} \item
\mbox{}\verb@@\\
\mbox{}\verb@cloglog <- function()@\\
\mbox{}\verb@    linkfun(name = "Complementary Log-log",@\\
\mbox{}\verb@            alias = "Savage",@\\
\mbox{}\verb@            model = "proportional hazards", @\\
\mbox{}\verb@            parm = "log-hazard ratio",@\\
\mbox{}\verb@            link = function(p, log.p = FALSE) {@\\
\mbox{}\verb@                if (log.p) p <- exp(p)@\\
\mbox{}\verb@                log(-log1p(- p))@\\
\mbox{}\verb@            },@\\
\mbox{}\verb@            linkinv = function(q, lower.tail = TRUE, log.p = FALSE) {@\\
\mbox{}\verb@                ### p = 1 - exp(-exp(q))@\\
\mbox{}\verb@                ret <- exp(-exp(q))@\\
\mbox{}\verb@                if (log.p) {@\\
\mbox{}\verb@                    if (lower.tail)@\\
\mbox{}\verb@                        return(log1p(-ret))@\\
\mbox{}\verb@                    return(-exp(q))@\\
\mbox{}\verb@                }@\\
\mbox{}\verb@                if (lower.tail)@\\
\mbox{}\verb@                    return(-expm1(-exp(q)))@\\
\mbox{}\verb@                return(ret)@\\
\mbox{}\verb@            },@\\
\mbox{}\verb@            dlinkinv = function(x) @\\
\mbox{}\verb@                ifelse(is.finite(x), exp(x - exp(x)), 0.0),@\\
\mbox{}\verb@            ddlinkinv = function(x) {@\\
\mbox{}\verb@                ex <- exp(x)@\\
\mbox{}\verb@                ifelse(is.finite(x), (ex - ex^2) / exp(ex), 0.0)@\\
\mbox{}\verb@            },@\\
\mbox{}\verb@            dddlinkinv = function(x) {@\\
\mbox{}\verb@                ex <- exp(x)@\\
\mbox{}\verb@                ifelse(is.finite(x), (ex - 3*ex^2 + ex^3) / exp(ex), 0.0)@\\
\mbox{}\verb@            },@\\
\mbox{}\verb@            dd2d = function(x)@\\
\mbox{}\verb@               -expm1(x),@\\
\mbox{}\verb@            parm2PI = plogis,@\\
\mbox{}\verb@            PI2parm = qlogis,@\\
\mbox{}\verb@            parm2OVL = function(x) {@\\
\mbox{}\verb@                x <- abs(x)@\\
\mbox{}\verb@                ret <- exp(x / (exp(-x) - 1)) - exp(-x / (exp(x) - 1)) + 1 @\\
\mbox{}\verb@                ret[abs(x) < .Machine$double.eps] <- 1@\\
\mbox{}\verb@                x[] <- ret@\\
\mbox{}\verb@                return(x)@\\
\mbox{}\verb@            }@\\
\mbox{}\verb@    )@\\
\mbox{}\verb@@{\NWsep}
\end{list}
\vspace{-1.5ex}
\footnotesize
\begin{list}{}{\setlength{\itemsep}{-\parsep}\setlength{\itemindent}{-\leftmargin}}
\item \NWtxtMacroRefIn\ \NWlink{nuweb16}{16}.

\item{}
\end{list}
\end{minipage}\vspace{4ex}
\end{flushleft}
The probit link, with $F(z) = \Phi$, leads to normal scores tests, where the
shift effect can be interpreted as a generalised version of Cohen's $d$,
that is, differences on a latent normal scale with variance one:

\begin{flushleft} \small
\begin{minipage}{\linewidth}\label{scrap24}\raggedright\small
\NWtarget{nuweb21}{} $\langle\,${\itshape probit}\nobreak\ {\footnotesize {21}}$\,\rangle\equiv$
\vspace{-1ex}
\begin{list}{}{} \item
\mbox{}\verb@@\\
\mbox{}\verb@probit <- function()@\\
\mbox{}\verb@    linkfun(name = "Probit",@\\
\mbox{}\verb@            alias = "van der Waerden normal scores",@\\
\mbox{}\verb@            model = "latent normal shift", @\\
\mbox{}\verb@            parm = "generalised Cohen's d",@\\
\mbox{}\verb@            link = qnorm,@\\
\mbox{}\verb@            linkinv = pnorm,@\\
\mbox{}\verb@            dlinkinv = dnorm,@\\
\mbox{}\verb@            ddlinkinv = function(x) @\\
\mbox{}\verb@                ifelse(is.finite(x), -dnorm(x = x) * x, 0.0), @\\
\mbox{}\verb@            dddlinkinv = function(x) @\\
\mbox{}\verb@                ifelse(is.finite(x), dnorm(x = x) * (x^2 - 1), 0.0),@\\
\mbox{}\verb@            dd2d = function(x) -x,@\\
\mbox{}\verb@            parm2PI = function(x) pnorm(x, sd = sqrt(2)),@\\
\mbox{}\verb@            PI2parm = function(p) qnorm(p, sd = sqrt(2)),@\\
\mbox{}\verb@            parm2OVL = function(x) 2 * pnorm(-abs(x / 2))@\\
\mbox{}\verb@    )@\\
\mbox{}\verb@@{\NWsep}
\end{list}
\vspace{-1.5ex}
\footnotesize
\begin{list}{}{\setlength{\itemsep}{-\parsep}\setlength{\itemindent}{-\leftmargin}}
\item \NWtxtMacroRefIn\ \NWlink{nuweb16}{16}.

\item{}
\end{list}
\end{minipage}\vspace{4ex}
\end{flushleft}
\chapter{Optimisation}

\cite{Harrell2024} reports on experiments with a number of optimisers for the specific
optimisation problem arising here and recommends a Newton-Raphson algorithm
leveraging the sparse matrix structure of the observed Fisher information
matrix. The following code was adopted from his \pkg{rms} package, function
\code{rms:::lrm.fit}.

\begin{flushleft} \small
\begin{minipage}{\linewidth}\label{scrap25}\raggedright\small
\NWtarget{nuweb22}{} $\langle\,${\itshape Newton update}\nobreak\ {\footnotesize {22}}$\,\rangle\equiv$
\vspace{-1ex}
\begin{list}{}{} \item
\mbox{}\verb@@\\
\mbox{}\verb@gradthe <- gradient(theta)     # Compute the gradient vector@\\
\mbox{}\verb@hessthe <- hessian(theta)      # Compute the Hessian matrix@\\
\mbox{}\verb@@\\
\mbox{}\verb@delta <- Matrix::solve(hessthe, gradthe, tol = control$tolsolve)@\\
\mbox{}\verb@@\\
\mbox{}\verb@if (control$trace)@\\
\mbox{}\verb@    cat(iter, ': ', theta, "\n", sep = "")@\\
\mbox{}\verb@@\\
\mbox{}\verb@step_size <- 1L                # Initialize step size for step-halving@\\
\mbox{}\verb@@{\NWsep}
\end{list}
\vspace{-1.5ex}
\footnotesize
\begin{list}{}{\setlength{\itemsep}{-\parsep}\setlength{\itemindent}{-\leftmargin}}
\item \NWtxtMacroRefIn\ \NWlink{nuweb24}{24}.

\item{}
\end{list}
\end{minipage}\vspace{4ex}
\end{flushleft}
\begin{flushleft} \small
\begin{minipage}{\linewidth}\label{scrap26}\raggedright\small
\NWtarget{nuweb23a}{} $\langle\,${\itshape Newton step halving}\nobreak\ {\footnotesize {23a}}$\,\rangle\equiv$
\vspace{-1ex}
\begin{list}{}{} \item
\mbox{}\verb@@\\
\mbox{}\verb@new_theta <- theta - step_size * delta # Update parameter vector@\\
\mbox{}\verb@objnew_the <- objective(new_theta)@\\
\mbox{}\verb@@\\
\mbox{}\verb@if (control$trace)@\\
\mbox{}\verb@    cat("Old, new, old - new objective:", @\\
\mbox{}\verb@        objthe, objnew_the, objthe - objnew_the, "\n")@\\
\mbox{}\verb@@\\
\mbox{}\verb@# Objective function failed to be reduced or is infinite@\\
\mbox{}\verb@if (!is.finite(objnew_the) || (objnew_the > objthe + 1e-6)) {@\\
\mbox{}\verb@    step_size <- step_size / 2         # Reduce the step size@\\
\mbox{}\verb@@\\
\mbox{}\verb@    if (control$trace) @\\
\mbox{}\verb@        cat("Step size reduced to", step_size, "\n")@\\
\mbox{}\verb@@\\
\mbox{}\verb@    if (step_size <= control$minstepsize) {@\\
\mbox{}\verb@        msg <- paste("Step size ", step_size, @\\
\mbox{}\verb@                     " has reduced below minstepsize")@\\
\mbox{}\verb@        return(list(par = theta, objective = objthe, convergence = 1, @\\
\mbox{}\verb@                    message = msg)) @\\
\mbox{}\verb@    }@\\
\mbox{}\verb@} else {@\\
\mbox{}\verb@    theta  <- new_theta # accept the new parameter vector@\\
\mbox{}\verb@    oldobj <- objthe@\\
\mbox{}\verb@    objthe <- objnew_the@\\
\mbox{}\verb@    break@\\
\mbox{}\verb@}@\\
\mbox{}\verb@@{\NWsep}
\end{list}
\vspace{-1.5ex}
\footnotesize
\begin{list}{}{\setlength{\itemsep}{-\parsep}\setlength{\itemindent}{-\leftmargin}}
\item \NWtxtMacroRefIn\ \NWlink{nuweb24}{24}.

\item{}
\end{list}
\end{minipage}\vspace{4ex}
\end{flushleft}
\begin{flushleft} \small
\begin{minipage}{\linewidth}\label{scrap27}\raggedright\small
\NWtarget{nuweb23b}{} $\langle\,${\itshape Newton convergence}\nobreak\ {\footnotesize {23b}}$\,\rangle\equiv$
\vspace{-1ex}
\begin{list}{}{} \item
\mbox{}\verb@@\\
\mbox{}\verb@# Convergence check - must meet 3 criteria@\\
\mbox{}\verb@if ((objthe <= oldobj + 1e-6 && (oldobj - objthe < control$objtol)) &&@\\
\mbox{}\verb@    (max(abs(gradthe)) < control$gradtol) &&@\\
\mbox{}\verb@    (max(abs(delta)) < control$paramtol))@\\
\mbox{}\verb@@\\
\mbox{}\verb@    return(list(par            = theta,@\\
\mbox{}\verb@                objective      = objthe,@\\
\mbox{}\verb@                convergence    = 0,@\\
\mbox{}\verb@                message        = "Normal convergence"))@\\
\mbox{}\verb@@{\NWsep}
\end{list}
\vspace{-1.5ex}
\footnotesize
\begin{list}{}{\setlength{\itemsep}{-\parsep}\setlength{\itemindent}{-\leftmargin}}
\item \NWtxtMacroRefIn\ \NWlink{nuweb24}{24}.

\item{}
\end{list}
\end{minipage}\vspace{4ex}
\end{flushleft}
\begin{flushleft} \small
\begin{minipage}{\linewidth}\label{scrap28}\raggedright\small
\NWtarget{nuweb24}{} $\langle\,${\itshape NewtonRaphson}\nobreak\ {\footnotesize {24}}$\,\rangle\equiv$
\vspace{-1ex}
\begin{list}{}{} \item
\mbox{}\verb@@\\
\mbox{}\verb@### adopted from rms:::lrm.fit@\\
\mbox{}\verb@.NewtonRaphson <- function(start, objective, gradient, hessian, @\\
\mbox{}\verb@                           control = list(iter.max = 150, trace = trace, @\\
\mbox{}\verb@                                          objtol = 5e-4, gradtol = 1e-5, @\\
\mbox{}\verb@                                          paramtol = 1e-5, minstepsize = 1e-2, @\\
\mbox{}\verb@                                          tolsolve = .Machine$double.eps),@\\
\mbox{}\verb@                           trace = FALSE)@\\
\mbox{}\verb@{@\\
\mbox{}\verb@@\\
\mbox{}\verb@    theta  <- start # Initialize the parameter vector@\\
\mbox{}\verb@    oldobj <- Inf@\\
\mbox{}\verb@    objthe <- objective(theta)@\\
\mbox{}\verb@    if (!is.finite(objthe)) {@\\
\mbox{}\verb@        msg <- "Infeasible starting values"@\\
\mbox{}\verb@        return(list(par = theta, objective = objthe, convergence = 1, @\\
\mbox{}\verb@                    message = msg)) @\\
\mbox{}\verb@    }@\\
\mbox{}\verb@@\\
\mbox{}\verb@    if(!suppressPackageStartupMessages(requireNamespace("Matrix")))@\\
\mbox{}\verb@        stop(gettextf("%s needs package 'Matrix' correctly installed",@\\
\mbox{}\verb@                      ".NewtonRaphson"),@\\
\mbox{}\verb@                 domain = NA)@\\
\mbox{}\verb@@\\
\mbox{}\verb@    for (iter in seq_len(control$iter.max)) {@\\
\mbox{}\verb@@\\
\mbox{}\verb@        @\hbox{$\langle\,${\itshape Newton update}\nobreak\ {\footnotesize \NWlink{nuweb22}{22}}$\,\rangle$}\verb@@\\
\mbox{}\verb@@\\
\mbox{}\verb@        # Step-halving loop@\\
\mbox{}\verb@        while (TRUE) {@\\
\mbox{}\verb@            @\hbox{$\langle\,${\itshape Newton step halving}\nobreak\ {\footnotesize \NWlink{nuweb23a}{23a}}$\,\rangle$}\verb@@\\
\mbox{}\verb@        }@\\
\mbox{}\verb@@\\
\mbox{}\verb@        @\hbox{$\langle\,${\itshape Newton convergence}\nobreak\ {\footnotesize \NWlink{nuweb23b}{23b}}$\,\rangle$}\verb@ @\\
\mbox{}\verb@    }@\\
\mbox{}\verb@@\\
\mbox{}\verb@    msg <- paste("Reached", control$iter.max, "iterations without convergence")@\\
\mbox{}\verb@    return(list(par = theta, objective = objthe, convergence = 1, message = msg)) @\\
\mbox{}\verb@}@\\
\mbox{}\verb@@{\NWsep}
\end{list}
\vspace{-1.5ex}
\footnotesize
\begin{list}{}{\setlength{\itemsep}{-\parsep}\setlength{\itemindent}{-\leftmargin}}
\item \NWtxtMacroRefIn\ \NWlink{nuweb25b}{25b}.

\item{}
\end{list}
\end{minipage}\vspace{4ex}
\end{flushleft}
<<Newton, echo = FALSE>>=

@

We can now test the optimiser on a least-squares problem

<<Newton-test>>=
N <- 10000
P <- 30
X <- matrix(rnorm(N * P), ncol = P)
y <- X %*% runif(P) + rnorm(nrow(X))

f <- function(par) sum((y - X %*% par)^2)
g <- function(par) colSums(- 2 * c(y - X %*% par) * X)
h <- function(par) 2 * crossprod(X)

start <- runif(P)

cf <- .NewtonRaphson(start = start, objective = f, gradient = g, hessian = h)

cf2 <- coef(m <- lm(y ~ 0 + X))
all.equal(sum((y - fitted(m))^2), cf$objective)
all.equal(unname(cf$par), unname(cf2))
@

\chapter{ML Estimation}
\label{ch:ML}

We use two internal \pkg{stats} functions, define here
\begin{flushleft} \small
\begin{minipage}{\linewidth}\label{scrap29}\raggedright\small
\NWtarget{nuweb25a}{} \verb@"utils.R"@\nobreak\ {\footnotesize {25a}}$\equiv$
\vspace{-1ex}
\begin{list}{}{} \item
\mbox{}\verb@@\\
\mbox{}\verb@### gives warnings but no diffs@\\
\mbox{}\verb@C_dpermdist2 <- stats:::C_dpermdist2@\\
\mbox{}\verb@assert_NULL_or_prob <- stats:::assert_NULL_or_prob@\\
\mbox{}\verb@@{\NWsep}
\end{list}
\vspace{-1.5ex}
\footnotesize
\begin{list}{}{\setlength{\itemsep}{-\parsep}\setlength{\itemindent}{-\leftmargin}}

\item{}
\end{list}
\end{minipage}\vspace{4ex}
\end{flushleft}
The file \file{free1way.R} goes into \file{src/library/stats/R}, so add
copyright statement here as well (such that we can simply copy the file in
case of updates).
\begin{flushleft} \small
\begin{minipage}{\linewidth}\label{scrap30}\raggedright\small
\NWtarget{nuweb25b}{} \verb@"free1way.R"@\nobreak\ {\footnotesize {25b}}$\equiv$
\vspace{-1ex}
\begin{list}{}{} \item
\mbox{}\verb@@\\
\mbox{}\verb@#  File src/library/stats/R/free1way.R@\\
\mbox{}\verb@#  Part of the R package, https://www.R-project.org@\\
\mbox{}\verb@#@\\
\mbox{}\verb@#  Copyright (C) 2026 The R Core Team@\\
\mbox{}\verb@#@\\
\mbox{}\verb@#  This program is free software; you can redistribute it and/or modify@\\
\mbox{}\verb@#  it under the terms of the GNU General Public License as published by@\\
\mbox{}\verb@#  the Free Software Foundation; either version 2 of the License, or@\\
\mbox{}\verb@#  (at your option) any later version.@\\
\mbox{}\verb@#@\\
\mbox{}\verb@#  This program is distributed in the hope that it will be useful,@\\
\mbox{}\verb@#  but WITHOUT ANY WARRANTY; without even the implied warranty of@\\
\mbox{}\verb@#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the@\\
\mbox{}\verb@#  GNU General Public License for more details.@\\
\mbox{}\verb@#@\\
\mbox{}\verb@#  A copy of the GNU General Public License is available at@\\
\mbox{}\verb@#  https://www.R-project.org/Licenses/@\\
\mbox{}\verb@@\\
\mbox{}\verb@@\hbox{$\langle\,${\itshape NewtonRaphson}\nobreak\ {\footnotesize \NWlink{nuweb24}{24}}$\,\rangle$}\verb@@\\
\mbox{}\verb@@\hbox{$\langle\,${\itshape ML estimation}\nobreak\ {\footnotesize \NWlink{nuweb33}{33}}$\,\rangle$}\verb@@\\
\mbox{}\verb@@\hbox{$\langle\,${\itshape free1way generic and table method (main workhorse)}\nobreak\ {\footnotesize \NWlink{nuweb46}{46}}$\,\rangle$}\verb@@\\
\mbox{}\verb@@\hbox{$\langle\,${\itshape free1way methods}\nobreak\ {\footnotesize \NWlink{nuweb53}{53}}$\,\rangle$}\verb@@\\
\mbox{}\verb@@\hbox{$\langle\,${\itshape free1way print}\nobreak\ {\footnotesize \NWlink{nuweb54}{54}}$\,\rangle$}\verb@@\\
\mbox{}\verb@@\hbox{$\langle\,${\itshape free1way summary}\nobreak\ {\footnotesize \NWlink{nuweb55}{55}}$\,\rangle$}\verb@@\\
\mbox{}\verb@@\hbox{$\langle\,${\itshape free1way confint}\nobreak\ {\footnotesize \NWlink{nuweb59}{59}}$\,\rangle$}\verb@@\\
\mbox{}\verb@@\hbox{$\langle\,${\itshape free1way formula}\nobreak\ {\footnotesize \NWlink{nuweb49}{49}}$\,\rangle$}\verb@@\\
\mbox{}\verb@@\hbox{$\langle\,${\itshape free1way numeric}\nobreak\ {\footnotesize \NWlink{nuweb51}{51}}$\,\rangle$}\verb@@\\
\mbox{}\verb@@\hbox{$\langle\,${\itshape free1way factor}\nobreak\ {\footnotesize \NWlink{nuweb52}{52}}$\,\rangle$}\verb@@\\
\mbox{}\verb@@\hbox{$\langle\,${\itshape plot free1way}\nobreak\ {\footnotesize \NWlink{nuweb83d}{83d}}$\,\rangle$}\verb@@\\
\mbox{}\verb@@\hbox{$\langle\,${\itshape ppplot}\nobreak\ {\footnotesize \NWlink{nuweb88}{88}}$\,\rangle$}\verb@@\\
\mbox{}\verb@@\hbox{$\langle\,${\itshape rfree1way}\nobreak\ {\footnotesize \NWlink{nuweb92}{92}}$\,\rangle$}\verb@@\\
\mbox{}\verb@@\hbox{$\langle\,${\itshape power}\nobreak\ {\footnotesize \NWlink{nuweb100}{100}}$\,\rangle$}\verb@@\\
\mbox{}\verb@@{\NWsep}
\end{list}
\vspace{-1.5ex}
\footnotesize
\begin{list}{}{\setlength{\itemsep}{-\parsep}\setlength{\itemindent}{-\leftmargin}}

\item{}
\end{list}
\end{minipage}\vspace{4ex}
\end{flushleft}
We now put together a low-level function for parameter estimation and
evaluation of scores, Hessians, and residuals. We also set-up a profile
likelihood function for later re-use. 

Assuming all shift effects been zero, we compute starting values for the
intercept parameters from the empirical cumulative distribution function
after merging all treatment groups:

\begin{flushleft} \small
\begin{minipage}{\linewidth}\label{scrap31}\raggedright\small
\NWtarget{nuweb26}{} $\langle\,${\itshape setup and starting values}\nobreak\ {\footnotesize {26}}$\,\rangle\equiv$
\vspace{-1ex}
\begin{list}{}{} \item
\mbox{}\verb@@\\
\mbox{}\verb@@\hbox{$\langle\,${\itshape table2list body}\nobreak\ {\footnotesize \NWlink{nuweb10a}{10a}}$\,\rangle$}\verb@@\\
\mbox{}\verb@## allow specification of start = delta and fix = 1:K@\\
\mbox{}\verb@## for evaluating the likelihood at given delta parameters@\\
\mbox{}\verb@## without having to specify all intercept parameters@\\
\mbox{}\verb@if (is.null(start))@\\
\mbox{}\verb@    start <- rep.int(0, K - 1L)@\\
\mbox{}\verb@NS <- length(start) == (K - 1L)@\\
\mbox{}\verb@lwr <- rep(-Inf, times = K - 1L)@\\
\mbox{}\verb@for (b in seq_len(length(xlist))) {@\\
\mbox{}\verb@    bC <- nrow(xlist[[b]]) - 1L@\\
\mbox{}\verb@    lwr <- c(lwr, -Inf, rep.int(0, times = bC - 1L))@\\
\mbox{}\verb@    if (NS) {@\\
\mbox{}\verb@        ecdf0 <- cumsum(rowSums(xlist[[b]]))@\\
\mbox{}\verb@        ### ensure that 0 < ecdf0 < 1 such that quantiles exist@\\
\mbox{}\verb@        ecdf0 <- pmax(1, ecdf0[-length(ecdf0)]) / (max(ecdf0) + 1)@\\
\mbox{}\verb@        Qecdf <- Q(ecdf0)@\\
\mbox{}\verb@        start <- c(start, Qecdf)@\\
\mbox{}\verb@    }@\\
\mbox{}\verb@}@\\
\mbox{}\verb@@{\NWsep}
\end{list}
\vspace{-1.5ex}
\footnotesize
\begin{list}{}{\setlength{\itemsep}{-\parsep}\setlength{\itemindent}{-\leftmargin}}
\item \NWtxtMacroRefIn\ \NWlink{nuweb33}{33}.

\item{}
\end{list}
\end{minipage}\vspace{4ex}
\end{flushleft}
The profile negative log-likelihood can be evaluated for some of the
parameters in $\thetavec$ (denoted as \code{fix}), the remaining parameters
are updated. Note that \code{start} can either just contain a subset of the
shift parameter or must contain the full and feasible
(meeting monotonicity constraints for the intercept parameters) parameter vector
$\thetavec$.

We call \code{nlminb} and will increase the \code{eval.max} and
\code{iter.max} control parameters if
we encounter optimisation issues and restart at the current solution:

\begin{flushleft} \small
\begin{minipage}{\linewidth}\label{scrap32}\raggedright\small
\NWtarget{nuweb27}{} $\langle\,${\itshape do optim}\nobreak\ {\footnotesize {27}}$\,\rangle\equiv$
\vspace{-1ex}
\begin{list}{}{} \item
\mbox{}\verb@@\\
\mbox{}\verb@maxit <- control[[1L]]$iter.max@\\
\mbox{}\verb@while(maxit < 10001) {@\\
\mbox{}\verb@   ret <- do.call(names(control)[[1L]], opargs)@\\
\mbox{}\verb@   maxit <- 5 * maxit@\\
\mbox{}\verb@   if (ret$convergence > 0) {@\\
\mbox{}\verb@       opargs$control$eval.max <- maxit@\\
\mbox{}\verb@       opargs$control$iter.max <- maxit@\\
\mbox{}\verb@       opargs$start <- ret$par@\\
\mbox{}\verb@   } else {@\\
\mbox{}\verb@       break()@\\
\mbox{}\verb@   }@\\
\mbox{}\verb@}@\\
\mbox{}\verb@@\\
\mbox{}\verb@if (isTRUE(MPL_Jeffreys)) {@\\
\mbox{}\verb@    @\hbox{$\langle\,${\itshape Jeffreys penalisation}\nobreak\ {\footnotesize \NWlink{nuweb30a}{30a}}$\,\rangle$}\verb@@\\
\mbox{}\verb@} else {@\\
\mbox{}\verb@    if (ret$convergence > 0) {@\\
\mbox{}\verb@        if (is.na(MPL_Jeffreys)) { ### only after failure@\\
\mbox{}\verb@            warning(gettextf("Jeffreys penalisation was applied in %s because initial optimisation failed with:",@\\
\mbox{}\verb@                             "free1way"),@\\
\mbox{}\verb@                    "\n  ", ret$message, domain = NA)@\\
\mbox{}\verb@            MPL_Jeffreys <- TRUE@\\
\mbox{}\verb@            @\hbox{$\langle\,${\itshape Jeffreys penalisation}\nobreak\ {\footnotesize \NWlink{nuweb30a}{30a}}$\,\rangle$}\verb@@\\
\mbox{}\verb@        }@\\
\mbox{}\verb@   }@\\
\mbox{}\verb@}@\\
\mbox{}\verb@if (ret$convergence > 0)@\\
\mbox{}\verb@    warning(gettextf("unsuccessful optimisation in %s", "free1way"),@\\
\mbox{}\verb@            ": ", ret$message, domain = NA)@\\
\mbox{}\verb@@\\
\mbox{}\verb@ret$MPL_Jeffreys <- MPL_Jeffreys@\\
\mbox{}\verb@ret$value <- ret$objective@\\
\mbox{}\verb@ret$objective <- NULL@\\
\mbox{}\verb@@{\NWsep}
\end{list}
\vspace{-1.5ex}
\footnotesize
\begin{list}{}{\setlength{\itemsep}{-\parsep}\setlength{\itemindent}{-\leftmargin}}
\item \NWtxtMacroRefIn\ \NWlink{nuweb29}{29}\NWlink{nuweb30b}{, 30b}.

\item{}
\end{list}
\end{minipage}\vspace{4ex}
\end{flushleft}
We first set-up the target function (the negative log-likelihood, also
dealing with right-censoring) and the corresponding gradient. We then add
the profile negative log-likelihood, which in turn calls the two functions
defined first. We start with the log-likelihood, its gradient and Hessian

\begin{flushleft} \small
\begin{minipage}{\linewidth}\label{scrap33}\raggedright\small
\NWtarget{nuweb28}{} $\langle\,${\itshape logLik, gradient, Hessian}\nobreak\ {\footnotesize {28}}$\,\rangle\equiv$
\vspace{-1ex}
\begin{list}{}{} \item
\mbox{}\verb@@\\
\mbox{}\verb@fn <- function(par) @\\
\mbox{}\verb@{@\\
\mbox{}\verb@    ret <- .snll(par, x = xlist, mu = mu)@\\
\mbox{}\verb@    if (!is.null(xrc))@\\
\mbox{}\verb@        ret <- ret + .snll(par, x = xrclist, mu = mu, @\\
\mbox{}\verb@                           rightcensored = TRUE)@\\
\mbox{}\verb@    return(ret)@\\
\mbox{}\verb@}@\\
\mbox{}\verb@gr <- function(par) @\\
\mbox{}\verb@{@\\
\mbox{}\verb@    ret <- .snsc(par, x = xlist, mu = mu)@\\
\mbox{}\verb@    if (!is.null(xrc))@\\
\mbox{}\verb@        ret <- ret + .snsc(par, x = xrclist, mu = mu, @\\
\mbox{}\verb@                           rightcensored = TRUE)@\\
\mbox{}\verb@    return(ret)@\\
\mbox{}\verb@}@\\
\mbox{}\verb@@\\
\mbox{}\verb@### allocate memory for hessian@\\
\mbox{}\verb@Hess <- Matrix::Matrix(0, nrow = length(start), ncol = length(start))@\\
\mbox{}\verb@@\\
\mbox{}\verb@he <- function(par) @\\
\mbox{}\verb@{@\\
\mbox{}\verb@    if (!is.null(xrc)) {@\\
\mbox{}\verb@        ret <- .shes(par, x = xlist, mu = mu, xrc = xrclist, full = Hess, @\\
\mbox{}\verb@                     retMatrix = names(control)[1L] == ".NewtonRaphson")@\\
\mbox{}\verb@    } else {@\\
\mbox{}\verb@        ret <- .shes(par, x = xlist, mu = mu, full = Hess, @\\
\mbox{}\verb@                     retMatrix = names(control)[1L] == ".NewtonRaphson")@\\
\mbox{}\verb@    }@\\
\mbox{}\verb@    return(ret)@\\
\mbox{}\verb@}@\\
\mbox{}\verb@@{\NWsep}
\end{list}
\vspace{-1.5ex}
\footnotesize
\begin{list}{}{\setlength{\itemsep}{-\parsep}\setlength{\itemindent}{-\leftmargin}}
\item \NWtxtMacroRefIn\ \NWlink{nuweb29}{29}.

\item{}
\end{list}
\end{minipage}\vspace{4ex}
\end{flushleft}
and define the profile log-likelihood based on these functions

\begin{flushleft} \small
\begin{minipage}{\linewidth}\label{scrap34}\raggedright\small
\NWtarget{nuweb29}{} $\langle\,${\itshape profile}\nobreak\ {\footnotesize {29}}$\,\rangle\equiv$
\vspace{-1ex}
\begin{list}{}{} \item
\mbox{}\verb@@\\
\mbox{}\verb@@\hbox{$\langle\,${\itshape logLik, gradient, Hessian}\nobreak\ {\footnotesize \NWlink{nuweb28}{28}}$\,\rangle$}\verb@@\\
\mbox{}\verb@@\\
\mbox{}\verb@.profile <- function(start, fix = seq_len(K - 1)) @\\
\mbox{}\verb@{@\\
\mbox{}\verb@    if (!all(fix %in% seq_len(K - 1)))@\\
\mbox{}\verb@        stop(gettextf("invalid argument '%s'", "fix"), domain = NA)@\\
\mbox{}\verb@    delta <- start[fix]@\\
\mbox{}\verb@    opargs <- list(start = start[-fix], @\\
\mbox{}\verb@                     objective = function(par) {@\\
\mbox{}\verb@                         p <- numeric(length(par) + length(fix))@\\
\mbox{}\verb@                         p[fix] <- delta@\\
\mbox{}\verb@                         p[-fix] <- par@\\
\mbox{}\verb@                         fn(p)@\\
\mbox{}\verb@                     },@\\
\mbox{}\verb@                     gradient = function(par) {@\\
\mbox{}\verb@                         p <- numeric(length(par) + length(fix))@\\
\mbox{}\verb@                         p[fix] <- delta@\\
\mbox{}\verb@                         p[-fix] <- par@\\
\mbox{}\verb@                         gr(p)[-fix]@\\
\mbox{}\verb@                     },@\\
\mbox{}\verb@                     hessian = function(par) {@\\
\mbox{}\verb@                         p <- numeric(length(par) + length(fix))@\\
\mbox{}\verb@                         p[fix] <- delta@\\
\mbox{}\verb@                         p[-fix] <- par@\\
\mbox{}\verb@                         he(p)[-fix, -fix, drop = FALSE]@\\
\mbox{}\verb@                     })@\\
\mbox{}\verb@    opargs$control <- control[[1L]]@\\
\mbox{}\verb@    MPL_Jeffreys <- FALSE ### turn off Jeffreys penalisation in .profile@\\
\mbox{}\verb@@\\
\mbox{}\verb@    @\hbox{$\langle\,${\itshape do optim}\nobreak\ {\footnotesize \NWlink{nuweb27}{27}}$\,\rangle$}\verb@@\\
\mbox{}\verb@@\\
\mbox{}\verb@    p <- numeric(length(start))@\\
\mbox{}\verb@    p[fix] <- delta@\\
\mbox{}\verb@    p[-fix] <- ret$par@\\
\mbox{}\verb@    ret$par <- p@\\
\mbox{}\verb@    ret@\\
\mbox{}\verb@}@\\
\mbox{}\verb@@{\NWsep}
\end{list}
\vspace{-1.5ex}
\footnotesize
\begin{list}{}{\setlength{\itemsep}{-\parsep}\setlength{\itemindent}{-\leftmargin}}
\item \NWtxtMacroRefIn\ \NWlink{nuweb33}{33}.

\item{}
\end{list}
\end{minipage}\vspace{4ex}
\end{flushleft}
Chapter~\ref{ch:penal} introduces a bias correction \citep{Firth1993},
essentially by adding a penalty (Jeffreys prior) term to the log-likelihood. The
\code{MPL_Jeffreys} argument triggers this bias correction via
penalisation with Jeffreys prior to by applied
when \code{TRUE}, not to be applied when \code{FALSE}, and to applied in
case the unpenalised ML estimation resulted in a convergance issue
(\code{NA}). This part is still experimental and needs more testing. It also
seems unclear if and how the Fisher information needs additional correction
and it is certainly unclear if one can proceed with permutation testing
after correcting the bias.

\begin{flushleft} \small
\begin{minipage}{\linewidth}\label{scrap35}\raggedright\small
\NWtarget{nuweb30a}{} $\langle\,${\itshape Jeffreys penalisation}\nobreak\ {\footnotesize {30a}}$\,\rangle\equiv$
\vspace{-1ex}
\begin{list}{}{} \item
\mbox{}\verb@@\\
\mbox{}\verb@.pll_Jeffreys <- function(cf, start) @\\
\mbox{}\verb@{@\\
\mbox{}\verb@    fix <- seq_along(cf)@\\
\mbox{}\verb@    start[fix] <- cf@\\
\mbox{}\verb@    ### compute profile likelihood w/o warnings@\\
\mbox{}\verb@    ret <- suppressWarnings(.profile(start, fix = fix))@\\
\mbox{}\verb@    Hfull <- he(ret$par)@\\
\mbox{}\verb@    Hfix <- as.matrix(solve(solve(Hfull)[fix, fix]))@\\
\mbox{}\verb@    return(ret$value - @\\
\mbox{}\verb@           .5 * determinant(Hfix, logarithm = TRUE)$modulus)@\\
\mbox{}\verb@}@\\
\mbox{}\verb@if (K == 2) {@\\
\mbox{}\verb@    MLcf <- ret$par[seq_len(K - 1)]@\\
\mbox{}\verb@    Fret <- optim(MLcf, fn = .pll_Jeffreys, start = ret$par,@\\
\mbox{}\verb@                  method = "Brent", lower = MLcf - 5, @\\
\mbox{}\verb@                  upper = MLcf + 5)@\\
\mbox{}\verb@} else {@\\
\mbox{}\verb@    ### Nelder-Mead@\\
\mbox{}\verb@    Fret <- optim(ret$par[seq_len(K - 1)], fn = .pll_Jeffreys, @\\
\mbox{}\verb@                  start = ret$par)@\\
\mbox{}\verb@}@\\
\mbox{}\verb@if (Fret$convergence == 0) {@\\
\mbox{}\verb@    start <- ret$par@\\
\mbox{}\verb@    start[seq_len(K - 1)] <- Fret$par@\\
\mbox{}\verb@    ret <- .profile(start, fix = seq_len(K - 1))@\\
\mbox{}\verb@    ret$objective <- ret$value@\\
\mbox{}\verb@}@\\
\mbox{}\verb@@{\NWsep}
\end{list}
\vspace{-1.5ex}
\footnotesize
\begin{list}{}{\setlength{\itemsep}{-\parsep}\setlength{\itemindent}{-\leftmargin}}
\item \NWtxtMacroRefIn\ \NWlink{nuweb27}{27}.

\item{}
\end{list}
\end{minipage}\vspace{4ex}
\end{flushleft}
The heart of the function is a call to \code{nlminb}, trying to obtain
parameter estimates of $\thetavec$ by minimising the negative
log-likelihood. We allow some (or all) parameters to be fixed at some
constants, and provide a profile version of the likelihood:

\begin{flushleft} \small
\begin{minipage}{\linewidth}\label{scrap36}\raggedright\small
\NWtarget{nuweb30b}{} $\langle\,${\itshape optim}\nobreak\ {\footnotesize {30b}}$\,\rangle\equiv$
\vspace{-1ex}
\begin{list}{}{} \item
\mbox{}\verb@@\\
\mbox{}\verb@if (!length(fix)) {@\\
\mbox{}\verb@    opargs <- list(start = start, @\\
\mbox{}\verb@                   objective = fn, @\\
\mbox{}\verb@                   gradient = gr,@\\
\mbox{}\verb@                   hessian = he)@\\
\mbox{}\verb@    opargs$control <- control[[1L]]@\\
\mbox{}\verb@    @\hbox{$\langle\,${\itshape do optim}\nobreak\ {\footnotesize \NWlink{nuweb27}{27}}$\,\rangle$}\verb@@\\
\mbox{}\verb@} else if (length(fix) == length(start)) {@\\
\mbox{}\verb@    ret <- list(par = start, @\\
\mbox{}\verb@                value = fn(start))@\\
\mbox{}\verb@} else {@\\
\mbox{}\verb@    ret <- .profile(start, fix = fix)@\\
\mbox{}\verb@}@\\
\mbox{}\verb@@{\NWsep}
\end{list}
\vspace{-1.5ex}
\footnotesize
\begin{list}{}{\setlength{\itemsep}{-\parsep}\setlength{\itemindent}{-\leftmargin}}
\item \NWtxtMacroRefIn\ \NWlink{nuweb33}{33}.

\item{}
\end{list}
\end{minipage}\vspace{4ex}
\end{flushleft}
After parameter estimation, we evaluate negative scores, the Hessian, and
negative residuals as requested:

\begin{flushleft} \small
\begin{minipage}{\linewidth}\label{scrap37}\raggedright\small
\NWtarget{nuweb31}{} $\langle\,${\itshape post processing}\nobreak\ {\footnotesize {31}}$\,\rangle\equiv$
\vspace{-1ex}
\begin{list}{}{} \item
\mbox{}\verb@@\\
\mbox{}\verb@if (is.null(fix) || (length(fix) == length(start)))@\\
\mbox{}\verb@    parm <- seq_len(K - 1)@\\
\mbox{}\verb@else @\\
\mbox{}\verb@    parm <- fix@\\
\mbox{}\verb@if (any(parm >= K)) return(ret)@\\
\mbox{}\verb@@\\
\mbox{}\verb@ret$coefficients <- ret$par[parm]@\\
\mbox{}\verb@dn2 <- dimnames(xt)[2L]@\\
\mbox{}\verb@names(ret$coefficients) <- cnames <- paste0(names(dn2), dn2[[1L]][1L + parm])@\\
\mbox{}\verb@@\\
\mbox{}\verb@par <- ret$par@\\
\mbox{}\verb@intercepts <- function(parm, x) @\\
\mbox{}\verb@{@\\
\mbox{}\verb@@\\
\mbox{}\verb@    @\hbox{$\langle\,${\itshape stratum prep}\nobreak\ {\footnotesize \NWlink{nuweb10b}{10b}}$\,\rangle$}\verb@@\\
\mbox{}\verb@@\\
\mbox{}\verb@    return(intercepts)@\\
\mbox{}\verb@}@\\
\mbox{}\verb@ret$intercepts <- intercepts(par, x = xlist)@\\
\mbox{}\verb@@\\
\mbox{}\verb@if (score) {@\\
\mbox{}\verb@    ret$negscore <- .snsc(par, x = xlist, mu = mu)[parm]@\\
\mbox{}\verb@    if (!is.null(xrc))@\\
\mbox{}\verb@        ret$negscore <- ret$negscore + .snsc(par, x = xrclist, mu = mu, @\\
\mbox{}\verb@                                             rightcensored = TRUE)[parm]@\\
\mbox{}\verb@}@\\
\mbox{}\verb@if (hessian) {@\\
\mbox{}\verb@    if (!is.null(xrc)) {@\\
\mbox{}\verb@        ret$hessian <- .shes(par, x = xlist, mu = mu, xrc = xrclist)@\\
\mbox{}\verb@    } else {@\\
\mbox{}\verb@        ret$hessian <- .shes(par, x = xlist, mu = mu)@\\
\mbox{}\verb@    }@\\
\mbox{}\verb@    ret$vcov <- solve(ret$hessian)@\\
\mbox{}\verb@    if (length(parm) != nrow(ret$hessian))@\\
\mbox{}\verb@       ret$hessian <- solve(ret$vcov <- ret$vcov[parm, parm, drop = FALSE])@\\
\mbox{}\verb@    rownames(ret$vcov) <- colnames(ret$vcov) <- rownames(ret$hessian) <-@\\
\mbox{}\verb@        colnames(ret$hessian) <-  cnames@\\
\mbox{}\verb@}@\\
\mbox{}\verb@if (residuals) {@\\
\mbox{}\verb@    ret$negresiduals <- .snsr(par, x = xlist, mu = mu)@\\
\mbox{}\verb@    if (!is.null(xrc)) {@\\
\mbox{}\verb@        rcr <- .snsr(par, x = xrclist, mu = mu, rightcensored = TRUE)@\\
\mbox{}\verb@        ret$negresiduals <- c(rbind(matrix(ret$negresiduals, nrow = C),@\\
\mbox{}\verb@                                    matrix(rcr, nrow = C)))@\\
\mbox{}\verb@     }@\\
\mbox{}\verb@}@\\
\mbox{}\verb@ret$profile <- function(start, fix)@\\
\mbox{}\verb@    .free1wayML(xt, link = link, mu = mu, start = start, fix = fix, tol = tol, @\\
\mbox{}\verb@                ...) @\\
\mbox{}\verb@ret$table <- xt@\\
\mbox{}\verb@@\\
\mbox{}\verb@ret$strata <- strata@\\
\mbox{}\verb@ret$mu <- mu@\\
\mbox{}\verb@if (length(ret$mu) == 1) {@\\
\mbox{}\verb@    names(ret$mu) <- link$parm@\\
\mbox{}\verb@} else {@\\
\mbox{}\verb@    names(ret$mu) <- c(paste(link$parm, cnames[1L], sep = ":"), cnames[-1L])@\\
\mbox{}\verb@}@\\
\mbox{}\verb@@{\NWsep}
\end{list}
\vspace{-1.5ex}
\footnotesize
\begin{list}{}{\setlength{\itemsep}{-\parsep}\setlength{\itemindent}{-\leftmargin}}
\item \NWtxtMacroRefIn\ \NWlink{nuweb33}{33}.

\item{}
\end{list}
\end{minipage}\vspace{4ex}
\end{flushleft}
Finally, we put everything into one function which returns an object of
class \code{free1wayML} for later use. The control parameters for
\code{.NewtonRaphson} and \code{stats::nlminb} are the ones suggested by
\cite{Harrell2024}. By default, the internal Newton-Raphson implementation
is used, we can switch to \code{stats::nlminb} by specifying \code{dooptim =
"nlminb"}. The latter option cannot handle Fisher information matrices in
form of a \code{Matrix} object and thus computing the updates takes more
time whenever a larger number of intercept parameters in present in the
problem.

\begin{flushleft} \small
\begin{minipage}{\linewidth}\label{scrap38}\raggedright\small
\NWtarget{nuweb33}{} $\langle\,${\itshape ML estimation}\nobreak\ {\footnotesize {33}}$\,\rangle\equiv$
\vspace{-1ex}
\begin{list}{}{} \item
\mbox{}\verb@@\\
\mbox{}\verb@.free1wayML <- function(x, link, mu = 0, start = NULL, fix = NULL, @\\
\mbox{}\verb@                        residuals = TRUE, score = TRUE, hessian = TRUE, @\\
\mbox{}\verb@                        MPL_Jeffreys = FALSE,@\\
\mbox{}\verb@                        ### use nlminb for small sample sizes@\\
\mbox{}\verb@                        dooptim = c(".NewtonRaphson", "nlminb")[1 + (sum(x) < 20)],                         @\\
\mbox{}\verb@                        control = list(@\\
\mbox{}\verb@                            "nlminb" = list(trace = trace, iter.max = 200,@\\
\mbox{}\verb@                                            eval.max = 200, rel.tol = 1e-10,@\\
\mbox{}\verb@                                            abs.tol = 1e-20, xf.tol = 1e-16),@\\
\mbox{}\verb@                            ".NewtonRaphson" = list(iter.max = 200, trace = trace, @\\
\mbox{}\verb@                                             objtol = 5e-4, @\\
\mbox{}\verb@                                             gradtol = 1e-5 * sum(x) / 1000, @\\
\mbox{}\verb@                                             paramtol = 1e-5, minstepsize = 1e-2, @\\
\mbox{}\verb@                                             tolsolve = .Machine$double.eps)@\\
\mbox{}\verb@                        )[dooptim],@\\
\mbox{}\verb@                        trace = FALSE, @\\
\mbox{}\verb@                        tol = sqrt(.Machine$double.eps), ...) @\\
\mbox{}\verb@{@\\
\mbox{}\verb@@\\
\mbox{}\verb@    ### convert to three-way table@\\
\mbox{}\verb@    xt <- x@\\
\mbox{}\verb@    if (!is.table(x))@\\
\mbox{}\verb@        stop(gettextf("invalid argument '%s'", "x"), domain = NA) # 'y' in free1way ...@\\
\mbox{}\verb@    dx <- dim(x)@\\
\mbox{}\verb@    dn <- dimnames(x)@\\
\mbox{}\verb@    if (length(dx) == 2L) {@\\
\mbox{}\verb@        x <- as.table(array(c(x), dim = dx <- c(dx, 1L)))@\\
\mbox{}\verb@        dimnames(x) <- dn <- c(dn, list(A = "A"))@\\
\mbox{}\verb@    }@\\
\mbox{}\verb@@\\
\mbox{}\verb@    ### short-cuts for link functions@\\
\mbox{}\verb@    F <- function(q) .p(link, q = q)@\\
\mbox{}\verb@    Q <- function(p) .q(link, p = p)@\\
\mbox{}\verb@    f <- function(q) .d(link, x = q)@\\
\mbox{}\verb@    fp <- function(q) .dd(link, x = q)@\\
\mbox{}\verb@@\\
\mbox{}\verb@    if(!suppressPackageStartupMessages(requireNamespace("Matrix")))@\\
\mbox{}\verb@        stop(gettextf("%s needs package 'Matrix' correctly installed",@\\
\mbox{}\verb@                      ".free1wayML"),@\\
\mbox{}\verb@                 domain = NA)@\\
\mbox{}\verb@@\\
\mbox{}\verb@    @\hbox{$\langle\,${\itshape setup and starting values}\nobreak\ {\footnotesize \NWlink{nuweb26}{26}}$\,\rangle$}\verb@@\\
\mbox{}\verb@    @\hbox{$\langle\,${\itshape negative logLik}\nobreak\ {\footnotesize \NWlink{nuweb3b}{3b}}$\,\rangle$}\verb@@\\
\mbox{}\verb@    @\hbox{$\langle\,${\itshape negative score}\nobreak\ {\footnotesize \NWlink{nuweb4b}{4b}}$\,\rangle$}\verb@@\\
\mbox{}\verb@    @\hbox{$\langle\,${\itshape negative score residuals}\nobreak\ {\footnotesize \NWlink{nuweb4c}{4c}}$\,\rangle$}\verb@@\\
\mbox{}\verb@    @\hbox{$\langle\,${\itshape Hessian}\nobreak\ {\footnotesize \NWlink{nuweb7}{7}}$\,\rangle$}\verb@@\\
\mbox{}\verb@    @\hbox{$\langle\,${\itshape stratified negative logLik}\nobreak\ {\footnotesize \NWlink{nuweb11a}{11a}}$\,\rangle$}\verb@@\\
\mbox{}\verb@    @\hbox{$\langle\,${\itshape stratified negative score}\nobreak\ {\footnotesize \NWlink{nuweb11b}{11b}}$\,\rangle$}\verb@@\\
\mbox{}\verb@    @\hbox{$\langle\,${\itshape stratified Hessian}\nobreak\ {\footnotesize \NWlink{nuweb14}{14}}$\,\rangle$}\verb@@\\
\mbox{}\verb@    @\hbox{$\langle\,${\itshape stratified negative score residual}\nobreak\ {\footnotesize \NWlink{nuweb11c}{11c}}$\,\rangle$}\verb@@\\
\mbox{}\verb@    @\hbox{$\langle\,${\itshape profile}\nobreak\ {\footnotesize \NWlink{nuweb29}{29}}$\,\rangle$}\verb@@\\
\mbox{}\verb@    @\hbox{$\langle\,${\itshape optim}\nobreak\ {\footnotesize \NWlink{nuweb30b}{30b}}$\,\rangle$}\verb@ @\\
\mbox{}\verb@    @\hbox{$\langle\,${\itshape post processing}\nobreak\ {\footnotesize \NWlink{nuweb31}{31}}$\,\rangle$}\verb@@\\
\mbox{}\verb@@\\
\mbox{}\verb@    class(ret) <- "free1wayML"@\\
\mbox{}\verb@    ret@\\
\mbox{}\verb@}@\\
\mbox{}\verb@@{\NWsep}
\end{list}
\vspace{-1.5ex}
\footnotesize
\begin{list}{}{\setlength{\itemsep}{-\parsep}\setlength{\itemindent}{-\leftmargin}}
\item \NWtxtMacroRefIn\ \NWlink{nuweb25b}{25b}.

\item{}
\end{list}
\end{minipage}\vspace{4ex}
\end{flushleft}
As an example, consider a stratified (two stata) $3 \times 3$ problem where
outcome category B is missing from the second stratum:

<<workhorse>>=
N <- 10
a <- matrix(c(5, 6, 4,
                    3, 5, 7,
                    3, 4, 5,
                    3, 5, 6,
                    0, 0, 0,
                    4, 6, 5), ncol = 3, byrow = TRUE)
x <- as.table(array(c(a[1:3,], a[-(1:3),]), dim = c(3, 3, 2)))
x
ret <- .free1wayML(x, logit())
ret[c("value", "par")]
cf <- ret$par
cf[1:2] <- cf[1:2] + .5
### new2old parameterisation
c(cf[1:2], cf[3], log(cf[4] - cf[3]), cf[5])
### profile for cf[1:2]
.free1wayML(x, logit(), start = cf, fix = 1:2)[c("value", "par")]
### profile for cf[2]
.free1wayML(x, logit(), start = cf, fix = 2)[c("value", "par")]
### evaluate log-likelihood at cf
.free1wayML(x, logit(), start = cf, 
            fix = seq_along(ret$par))[c("value", "par")]
@

\chapter{ML Inference}
\label{ch:MLinf}

Based on an object of class \code{free1wayML}, we can setup different test
statistics and obtain the limiting null distribution based on classical ML
theory under the population model:

\begin{flushleft} \small
\begin{minipage}{\linewidth}\label{scrap39}\raggedright\small
\NWtarget{nuweb36a}{} $\langle\,${\itshape statistics}\nobreak\ {\footnotesize {36a}}$\,\rangle\equiv$
\vspace{-1ex}
\begin{list}{}{} \item
\mbox{}\verb@@\\
\mbox{}\verb@if (test == "Wald") {@\\
\mbox{}\verb@    @\hbox{$\langle\,${\itshape Wald statistic}\nobreak\ {\footnotesize \NWlink{nuweb36b}{36b}}$\,\rangle$}\verb@@\\
\mbox{}\verb@} else if (test == "LRT") {@\\
\mbox{}\verb@    @\hbox{$\langle\,${\itshape LRT}\nobreak\ {\footnotesize \NWlink{nuweb37a}{37a}}$\,\rangle$}\verb@@\\
\mbox{}\verb@} else if (test == "Rao") {@\\
\mbox{}\verb@    @\hbox{$\langle\,${\itshape Rao}\nobreak\ {\footnotesize \NWlink{nuweb37b}{37b}}$\,\rangle$}\verb@@\\
\mbox{}\verb@} else if (test == "Permutation") {@\\
\mbox{}\verb@    @\hbox{$\langle\,${\itshape Permutation statistics}\nobreak\ {\footnotesize \NWlink{nuweb38}{38}}$\,\rangle$}\verb@@\\
\mbox{}\verb@}@\\
\mbox{}\verb@@{\NWsep}
\end{list}
\vspace{-1.5ex}
\footnotesize
\begin{list}{}{\setlength{\itemsep}{-\parsep}\setlength{\itemindent}{-\leftmargin}}
\item \NWtxtMacroRefIn\ \NWlink{nuweb54}{54}\NWlink{nuweb59}{, 59}.

\item{}
\end{list}
\end{minipage}\vspace{4ex}
\end{flushleft}
\section{Wald Statistics}

We only need access to the parameter estimates $\hat{\delta}_2, \dots,
\hat{\delta}_K$ and the corresponding Hessian:

\begin{flushleft} \small
\begin{minipage}{\linewidth}\label{scrap40}\raggedright\small
\NWtarget{nuweb36b}{} $\langle\,${\itshape Wald statistic}\nobreak\ {\footnotesize {36b}}$\,\rangle\equiv$
\vspace{-1ex}
\begin{list}{}{} \item
\mbox{}\verb@@\\
\mbox{}\verb@if (alternative == "two.sided") {@\\
\mbox{}\verb@    STATISTIC <- c("Wald chi-squared" = @\\
\mbox{}\verb@                   c(crossprod(cf, x$hessian %*% cf)))@\\
\mbox{}\verb@    DF <- c("df" = length(parm))@\\
\mbox{}\verb@    PVAL <- pchisq(STATISTIC, df = DF, lower.tail = FALSE)@\\
\mbox{}\verb@} else {@\\
\mbox{}\verb@    STATISTIC <- c("Wald Z" = unname(c(cf * sqrt(c(x$hessian)))))@\\
\mbox{}\verb@    PVAL <- pnorm(STATISTIC, lower.tail = alternative == "less")@\\
\mbox{}\verb@}@\\
\mbox{}\verb@@{\NWsep}
\end{list}
\vspace{-1.5ex}
\footnotesize
\begin{list}{}{\setlength{\itemsep}{-\parsep}\setlength{\itemindent}{-\leftmargin}}
\item \NWtxtMacroRefIn\ \NWlink{nuweb36a}{36a}.

\item{}
\end{list}
\end{minipage}\vspace{4ex}
\end{flushleft}
\section{Likelihood-ratio Statistics}

In addition to the log-likelihood evaluated at the ML estimates, we need to
evaluate the profile log-likelihood at some value corresponding the null
hypothesis to be tested:

\begin{flushleft} \small
\begin{minipage}{\linewidth}\label{scrap41}\raggedright\small
\NWtarget{nuweb37a}{} $\langle\,${\itshape LRT}\nobreak\ {\footnotesize {37a}}$\,\rangle\equiv$
\vspace{-1ex}
\begin{list}{}{} \item
\mbox{}\verb@@\\
\mbox{}\verb@par <- x$par@\\
\mbox{}\verb@par[parm] <- value@\\
\mbox{}\verb@unll <- x$value ### neg logLik@\\
\mbox{}\verb@rnll <- x$profile(par, parm)$value ### neg logLik@\\
\mbox{}\verb@STATISTIC <- c("logLR chi-squared" = - 2 * (unll - rnll))@\\
\mbox{}\verb@DF <- c("df" = length(parm))@\\
\mbox{}\verb@PVAL <- pchisq(STATISTIC, df = DF, lower.tail = FALSE)@\\
\mbox{}\verb@@{\NWsep}
\end{list}
\vspace{-1.5ex}
\footnotesize
\begin{list}{}{\setlength{\itemsep}{-\parsep}\setlength{\itemindent}{-\leftmargin}}
\item \NWtxtMacroRefIn\ \NWlink{nuweb36a}{36a}.

\item{}
\end{list}
\end{minipage}\vspace{4ex}
\end{flushleft}
\section{Rao Score Statistics}

For the Rao score test, the inverse of the Hessian as well as the score
function of the shift parameters evaluated for some null values need to be
computed:

\begin{flushleft} \small
\begin{minipage}{\linewidth}\label{scrap42}\raggedright\small
\NWtarget{nuweb37b}{} $\langle\,${\itshape Rao}\nobreak\ {\footnotesize {37b}}$\,\rangle\equiv$
\vspace{-1ex}
\begin{list}{}{} \item
\mbox{}\verb@@\\
\mbox{}\verb@par <- x$par@\\
\mbox{}\verb@par[parm] <- value@\\
\mbox{}\verb@ret <- x$profile(par, parm)@\\
\mbox{}\verb@if (alternative == "two.sided") {@\\
\mbox{}\verb@    STATISTIC <- c("Rao chi-squared" = c(crossprod(ret$negscore, @\\
\mbox{}\verb@                                                   ret$vcov %*%@\\
\mbox{}\verb@                                                   ret$negscore)))@\\
\mbox{}\verb@    DF <- c("df" = length(parm))@\\
\mbox{}\verb@    PVAL <- pchisq(STATISTIC, df = DF, lower.tail = FALSE)@\\
\mbox{}\verb@} else {@\\
\mbox{}\verb@    STATISTIC <- c("Rao Z" = unname(- ret$negscore * @\\
\mbox{}\verb@                                      sqrt(c(ret$vcov))))@\\
\mbox{}\verb@    PVAL <- pnorm(STATISTIC, lower.tail = alternative == "less")@\\
\mbox{}\verb@}@\\
\mbox{}\verb@@{\NWsep}
\end{list}
\vspace{-1.5ex}
\footnotesize
\begin{list}{}{\setlength{\itemsep}{-\parsep}\setlength{\itemindent}{-\leftmargin}}
\item \NWtxtMacroRefIn\ \NWlink{nuweb36a}{36a}.

\item{}
\end{list}
\end{minipage}\vspace{4ex}
\end{flushleft}
\chapter{Permutation Inference}
\label{ch:Perminf}

Under the permutation model, that is, in randomised experiments where the
random treatment allocation is the only relevant source of randomness, we
compute a permutation variant of the Rao score test, based on the conditional
asymptotic distribution or based on a Monte-Carlo estimate of the reference
distribution:

\begin{flushleft} \small
\begin{minipage}{\linewidth}\label{scrap43}\raggedright\small
\NWtarget{nuweb38}{} $\langle\,${\itshape Permutation statistics}\nobreak\ {\footnotesize {38}}$\,\rangle\equiv$
\vspace{-1ex}
\begin{list}{}{} \item
\mbox{}\verb@@\\
\mbox{}\verb@par <- x$par@\\
\mbox{}\verb@par[parm] <- value@\\
\mbox{}\verb@ret <- x$profile(par, parm)@\\
\mbox{}\verb@sc <- - ret$negscore@\\
\mbox{}\verb@if (length(cf) == 1L)@\\
\mbox{}\verb@    sc <- sc / sqrt(c(ret$hessian))@\\
\mbox{}\verb@if (!is.null(x$exact)) {@\\
\mbox{}\verb@    STATISTIC = c("W" = sc)@\\
\mbox{}\verb@} else {@\\
\mbox{}\verb@    Esc <- sc - x$perm$Expectation@\\
\mbox{}\verb@@\\
\mbox{}\verb@    if (alternative == "two.sided" && length(cf) > 1L) {@\\
\mbox{}\verb@        STATISTIC <- c("Perm chi-squared" = @\\
\mbox{}\verb@                       sum(Esc * solve(x$perm$Covariance, Esc)))@\\
\mbox{}\verb@    } else {@\\
\mbox{}\verb@        STATISTIC <- c("Perm Z" = Esc / sqrt(c(x$perm$Covariance)))@\\
\mbox{}\verb@    }@\\
\mbox{}\verb@}@\\
\mbox{}\verb@@{\NWsep}
\end{list}
\vspace{-1.5ex}
\footnotesize
\begin{list}{}{\setlength{\itemsep}{-\parsep}\setlength{\itemindent}{-\leftmargin}}
\item \NWtxtMacroRefIn\ \NWlink{nuweb36a}{36a}.

\item{}
\end{list}
\end{minipage}\vspace{4ex}
\end{flushleft}
In addition, we compute permutation $p$-values

\begin{flushleft} \small
\begin{minipage}{\linewidth}\label{scrap44}\raggedright\small
\NWtarget{nuweb39}{} $\langle\,${\itshape Permutation p-values}\nobreak\ {\footnotesize {39}}$\,\rangle\equiv$
\vspace{-1ex}
\begin{list}{}{} \item
\mbox{}\verb@@\\
\mbox{}\verb@if (!is.null(x$exact)) {@\\
\mbox{}\verb@    PVAL <- switch(alternative,@\\
\mbox{}\verb@                   "two.sided" = 2 * min(c(x$exact$ple(sc), @\\
\mbox{}\verb@                                           x$exact$pgr(sc))),@\\
\mbox{}\verb@                   "less" = x$exact$ple(sc),@\\
\mbox{}\verb@                   "greater" = x$exact$pgr(sc))@\\
\mbox{}\verb@} else {@\\
\mbox{}\verb@    .pm <- function(x) sum(x) / length(x) @\\
\mbox{}\verb@    ps <- x$perm$permStat@\\
\mbox{}\verb@@\\
\mbox{}\verb@    .GE <- function(x, y)@\\
\mbox{}\verb@        (y - x) <= sqrt(.Machine$double.eps)@\\
\mbox{}\verb@@\\
\mbox{}\verb@    .LE <- function(x, y)@\\
\mbox{}\verb@        (x - y) <= sqrt(.Machine$double.eps)@\\
\mbox{}\verb@@\\
\mbox{}\verb@    if (alternative == "two.sided" && length(cf) > 1L) {@\\
\mbox{}\verb@        if (!is.null(ps)) {@\\
\mbox{}\verb@            PVAL <- .pm(.GE(ps, STATISTIC))@\\
\mbox{}\verb@        } else {@\\
\mbox{}\verb@            DF <- c("df" = x$perm$DF)@\\
\mbox{}\verb@            PVAL <- pchisq(STATISTIC, df = DF, lower.tail = FALSE)@\\
\mbox{}\verb@        }@\\
\mbox{}\verb@    } else {@\\
\mbox{}\verb@        if (!is.null(ps)) {@\\
\mbox{}\verb@            PVALle <- .pm(.LE(ps, STATISTIC))@\\
\mbox{}\verb@            PVALge <- .pm(.GE(ps, STATISTIC))@\\
\mbox{}\verb@            if (alternative == "two.sided")@\\
\mbox{}\verb@                PVAL <- 2 * min(c(PVALle, PVALge))@\\
\mbox{}\verb@            else if (alternative == "less")@\\
\mbox{}\verb@                PVAL <- PVALle@\\
\mbox{}\verb@            else@\\
\mbox{}\verb@                PVAL <- PVALge@\\
\mbox{}\verb@        } else {@\\
\mbox{}\verb@            if (alternative == "two.sided")@\\
\mbox{}\verb@                PVAL <- pchisq(STATISTIC^2, df = 1, lower.tail = FALSE)@\\
\mbox{}\verb@            else@\\
\mbox{}\verb@                PVAL <- pnorm(STATISTIC, @\\
\mbox{}\verb@                              lower.tail = alternative == "less")@\\
\mbox{}\verb@        }@\\
\mbox{}\verb@    }@\\
\mbox{}\verb@}@\\
\mbox{}\verb@@{\NWsep}
\end{list}
\vspace{-1.5ex}
\footnotesize
\begin{list}{}{\setlength{\itemsep}{-\parsep}\setlength{\itemindent}{-\leftmargin}}
\item \NWtxtMacroRefIn\ \NWlink{nuweb54}{54}.

\item{}
\end{list}
\end{minipage}\vspace{4ex}
\end{flushleft}
The mean and variance of the linear permutation statistic under the null was
given by \cite{strasserweber1999}:

\begin{flushleft} \small
\begin{minipage}{\linewidth}\label{scrap45}\raggedright\small
\NWtarget{nuweb40}{} $\langle\,${\itshape Strasser Weber}\nobreak\ {\footnotesize {40}}$\,\rangle\equiv$
\vspace{-1ex}
\begin{list}{}{} \item
\mbox{}\verb@@\\
\mbox{}\verb@.SW <- function(res, xt) @\\
\mbox{}\verb@{@\\
\mbox{}\verb@@\\
\mbox{}\verb@    if (length(dim(xt)) == 3L) {@\\
\mbox{}\verb@        res <- matrix(res, nrow = dim(xt)[1L], ncol = dim(xt)[3])@\\
\mbox{}\verb@        STAT <-  Exp <- Cov <- 0@\\
\mbox{}\verb@        for (b in seq_len(dim(xt)[3L])) {@\\
\mbox{}\verb@            sw <- .SW(res[,b, drop = TRUE], xt[,,b, drop = TRUE])@\\
\mbox{}\verb@            STAT <- STAT + sw$Statistic@\\
\mbox{}\verb@            Exp <- Exp + sw$Expectation@\\
\mbox{}\verb@            Cov <- Cov + sw$Covariance@\\
\mbox{}\verb@        }@\\
\mbox{}\verb@        return(list(Statistic = STAT, Expectation = as.vector(Exp),@\\
\mbox{}\verb@                    Covariance = Cov))@\\
\mbox{}\verb@    }@\\
\mbox{}\verb@@\\
\mbox{}\verb@    Y <- matrix(res, ncol = 1, nrow = length(xt))@\\
\mbox{}\verb@    weights <- c(xt)@\\
\mbox{}\verb@    x <- gl(ncol(xt), nrow(xt))@\\
\mbox{}\verb@    X <- model.matrix(~ x, data = data.frame(x = x))[,-1L,drop = FALSE]@\\
\mbox{}\verb@@\\
\mbox{}\verb@    w. <- sum(weights)@\\
\mbox{}\verb@    wX <- weights * X@\\
\mbox{}\verb@    wY <- weights * Y@\\
\mbox{}\verb@    ExpX <- colSums(wX)@\\
\mbox{}\verb@    ExpY <- colSums(wY) / w.@\\
\mbox{}\verb@    CovX <- crossprod(X, wX)@\\
\mbox{}\verb@    Yc <- t(t(Y) - ExpY)@\\
\mbox{}\verb@    CovY <- crossprod(Yc, weights * Yc) / w.@\\
\mbox{}\verb@    Exp <- kronecker(ExpY, ExpX)@\\
\mbox{}\verb@    Cov <- w. / (w. - 1) * kronecker(CovY, CovX) -@\\
\mbox{}\verb@           1 / (w. - 1) * kronecker(CovY, tcrossprod(ExpX))@\\
\mbox{}\verb@    STAT <- crossprod(X, wY)@\\
\mbox{}\verb@    list(Statistic = STAT, Expectation = as.vector(Exp),@\\
\mbox{}\verb@         Covariance = Cov)@\\
\mbox{}\verb@}@\\
\mbox{}\verb@@{\NWsep}
\end{list}
\vspace{-1.5ex}
\footnotesize
\begin{list}{}{\setlength{\itemsep}{-\parsep}\setlength{\itemindent}{-\leftmargin}}
\item \NWtxtMacroRefIn\ \NWlink{nuweb47}{47}.

\item{}
\end{list}
\end{minipage}\vspace{4ex}
\end{flushleft}
For small samples, we used the \code{r2dtable} function to sample from
tables with fixed marginal distributions:

\begin{flushleft} \small
\begin{minipage}{\linewidth}\label{scrap46}\raggedright\small
\NWtarget{nuweb41}{} $\langle\,${\itshape resampling}\nobreak\ {\footnotesize {41}}$\,\rangle\equiv$
\vspace{-1ex}
\begin{list}{}{} \item
\mbox{}\verb@@\\
\mbox{}\verb@.resample <- function(res, xt, B = 10000) @\\
\mbox{}\verb@{@\\
\mbox{}\verb@@\\
\mbox{}\verb@    if (length(dim(xt)) == 2L)@\\
\mbox{}\verb@        xt <- as.table(array(xt, dim = c(dim(xt), 1)))@\\
\mbox{}\verb@@\\
\mbox{}\verb@    res <- matrix(res, nrow = dim(xt)[1L], ncol = dim(xt)[3L])@\\
\mbox{}\verb@    stat <- 0@\\
\mbox{}\verb@    ret <- .SW(res, xt)@\\
\mbox{}\verb@    if (dim(xt)[2L] == 2L) {@\\
\mbox{}\verb@        ret$testStat <- c((ret$Statistic - ret$Expectation) / @\\
\mbox{}\verb@                          sqrt(c(ret$Covariance)))@\\
\mbox{}\verb@    } else {@\\
\mbox{}\verb@        ES <- ret$Statistic - ret$Expectation@\\
\mbox{}\verb@        ret$testStat <- sum(ES * solve(ret$Covariance, ES))@\\
\mbox{}\verb@    }@\\
\mbox{}\verb@    ret$DF <- dim(xt)[2L] - 1L@\\
\mbox{}\verb@@\\
\mbox{}\verb@    if (B) {@\\
\mbox{}\verb@        for (j in 1:dim(xt)[3L]) {@\\
\mbox{}\verb@           rt <- r2dtable(B, r = rowSums(xt[,,j]), c = colSums(xt[,,j]))@\\
\mbox{}\verb@           stat <- stat + vapply(rt, @\\
\mbox{}\verb@               function(x) .colSums(x[,-1L, drop = FALSE] * res[,j], @\\
\mbox{}\verb@                                    m = nrow(x), n = ncol(x) - 1L), @\\
\mbox{}\verb@                                 FUN.VALUE = rep(0, dim(xt)[[2L]] - 1L))@\\
\mbox{}\verb@        }@\\
\mbox{}\verb@        if (dim(xt)[2L] == 2L) {@\\
\mbox{}\verb@             ret$permStat <- (stat - ret$Expectation) / @\\
\mbox{}\verb@                              sqrt(c(ret$Covariance))@\\
\mbox{}\verb@        } else {@\\
\mbox{}\verb@            ES <- matrix(stat, ncol = B) - ret$Expectation@\\
\mbox{}\verb@            ret$permStat <- .colSums(ES * solve(ret$Covariance, ES), @\\
\mbox{}\verb@                                     m = dim(xt)[[2L]] - 1L, n = B)@\\
\mbox{}\verb@        }@\\
\mbox{}\verb@    }@\\
\mbox{}\verb@    ret@\\
\mbox{}\verb@}@\\
\mbox{}\verb@@{\NWsep}
\end{list}
\vspace{-1.5ex}
\footnotesize
\begin{list}{}{\setlength{\itemsep}{-\parsep}\setlength{\itemindent}{-\leftmargin}}
\item \NWtxtMacroRefIn\ \NWlink{nuweb47}{47}.

\item{}
\end{list}
\end{minipage}\vspace{4ex}
\end{flushleft}
For the special case of the unstratified Wilcoxon two-sample test, we can
also provide exact $p$-values computed via the Streitberg-R\"ohmel shift
algorithm, mainly because the scores can be mapped to integers:

\begin{flushleft} \small
\begin{minipage}{\linewidth}\label{scrap47}\raggedright\small
\NWtarget{nuweb42}{} $\langle\,${\itshape exact proportional odds}\nobreak\ {\footnotesize {42}}$\,\rangle\equiv$
\vspace{-1ex}
\begin{list}{}{} \item
\mbox{}\verb@@\\
\mbox{}\verb@.exact <- function(z, grp, w = rep.int(1, length(z))) @\\
\mbox{}\verb@{@\\
\mbox{}\verb@@\\
\mbox{}\verb@    z <- rep(z, times = w)@\\
\mbox{}\verb@    grp <- rep(grp, times = w)@\\
\mbox{}\verb@    x <- rank(z)@\\
\mbox{}\verb@    f <- 2 - all(x == floor(x))@\\
\mbox{}\verb@    x <- as.integer(x * f)@\\
\mbox{}\verb@    x <- x - min(x) + 1L@\\
\mbox{}\verb@    sx <- sort(x)@\\
\mbox{}\verb@@\\
\mbox{}\verb@    m <- as.integer(sum(grp > 0))@\\
\mbox{}\verb@    stopifnot(m > 1)@\\
\mbox{}\verb@    stopifnot(m < length(x))@\\
\mbox{}\verb@@\\
\mbox{}\verb@    d <- .Call(stats:::C_dpermdist2, sx, m)@\\
\mbox{}\verb@    s <- seq.int(from = 1L, to = sum(rev(sx)[seq_len(m)]), by = 1L)@\\
\mbox{}\verb@@\\
\mbox{}\verb@    STATISTIC <- sum(x[grp > 0])@\\
\mbox{}\verb@    F <- cumsum(d)@\\
\mbox{}\verb@    S <- rev(cumsum(rev(d)))@\\
\mbox{}\verb@    cf <- lm.fit(x = cbind(1, x), y = as.double(z))$coefficients@\\
\mbox{}\verb@@\\
\mbox{}\verb@    z2x <- function(z) round((z - m * cf[1]) / cf[2])@\\
\mbox{}\verb@@\\
\mbox{}\verb@    c(ple = function(z) sum(d[s <= z2x(z)]), # s and STATISTIC are integers@\\
\mbox{}\verb@      pgr = function(z) sum(d[s >= z2x(z)]), @\\
\mbox{}\verb@      qle = function(q) c(m, max(s[F < q + 1e-08])) %*% cf,@\\
\mbox{}\verb@      qgr = function(q) c(m, min(s[S < q + 1e-08])) %*% cf)@\\
\mbox{}\verb@}@\\
\mbox{}\verb@@{\NWsep}
\end{list}
\vspace{-1.5ex}
\footnotesize
\begin{list}{}{\setlength{\itemsep}{-\parsep}\setlength{\itemindent}{-\leftmargin}}
\item \NWtxtMacroRefIn\ \NWlink{nuweb47}{47}.

\item{}
\end{list}
\end{minipage}\vspace{4ex}
\end{flushleft}
As an example, consider the Wilcoxon rank sum test, where the scores under
the null are a linear function of the ranks of the data. We compute the
asymptotic and approximated reference distribution and corresponding
$p$-values for a test statistics in quadratic form:

<<SW>>=
set.seed(29)
w <- gl(2, 15)
(s <- .SW(r <- rank(u <- runif(length(w))), model.matrix(~ 0 + w)))
ps <- .resample(r, model.matrix(~ 0 + w), B = 100000)
ps$testStat^2
mean(abs(ps$permStat) > abs(ps$testStat) - .Machine$double.eps)
pchisq(ps$testStat^ifelse(ps$DF == 1, 2, 1), df = ps$DF, lower.tail = FALSE)
### exactly the same
kruskal.test(u ~ w)
library("coin")
### almost the same
kruskal_test(u ~ w, distribution = approximate(100000))
@
and the exact versions are
<<Wexact>>=
wilcox_test(u ~ w, distribution = "exact")
free1way(u ~ w, exact = TRUE)
@

<<Wexact-le>>=
wilcox_test(u ~ w, distribution = "exact", alternative = "less")
print(free1way(u ~ w, exact = TRUE), alternative = "greater")
@

<<Wexact-gr>>=
wilcox_test(u ~ w, distribution = "exact", alternative = "greater")
print(free1way(u ~ w, exact = TRUE), alternative = "less")
@


<TH>Ordered alternatives: Use contrast based tests in multcomp</TH>

\chapter{Distribution-free Tests in Stratified $K$-sample Oneway Layouts}

\section{\code{free1way}}

We provide a new test procedure in a generic \code{free1way}, featuring
a method for tables (the main workhorse) and additional user interfaces. 

\begin{flushleft} \small
\begin{minipage}{\linewidth}\label{scrap48}\raggedright\small
\NWtarget{nuweb45}{} $\langle\,${\itshape link2fun}\nobreak\ {\footnotesize {45}}$\,\rangle\equiv$
\vspace{-1ex}
\begin{list}{}{} \item
\mbox{}\verb@@\\
\mbox{}\verb@if (!inherits(link, "linkfun")) {@\\
\mbox{}\verb@    link <- match.arg(link)@\\
\mbox{}\verb@    link <- do.call(link, list())@\\
\mbox{}\verb@}@\\
\mbox{}\verb@@{\NWsep}
\end{list}
\vspace{-1.5ex}
\footnotesize
\begin{list}{}{\setlength{\itemsep}{-\parsep}\setlength{\itemindent}{-\leftmargin}}
\item \NWtxtMacroRefIn\ \NWlink{nuweb46}{46}\NWlink{nuweb92}{, 92}\NWlink{nuweb95}{, 95}\NWlink{nuweb97a}{, 97a}.

\item{}
\end{list}
\end{minipage}\vspace{4ex}
\end{flushleft}
We use the positive residuals for defining a permutation test with treatment
effect coding using the first group as control, that is, the test statistic
is defined through the sum of the positive residuals in all but the control
group. Unfortunately, most \code{stats::*.test} procedures use the second
group as control, so factors need to be releveled to obtain identical
results (this is relevant for the one-sided case).

\begin{flushleft} \small
\begin{minipage}{\linewidth}\label{scrap49}\raggedright\small
\NWtarget{nuweb46}{} $\langle\,${\itshape free1way generic and table method (main workhorse)}\nobreak\ {\footnotesize {46}}$\,\rangle\equiv$
\vspace{-1ex}
\begin{list}{}{} \item
\mbox{}\verb@@\\
\mbox{}\verb@free1way <- function(y, ...)@\\
\mbox{}\verb@    UseMethod("free1way")@\\
\mbox{}\verb@@\\
\mbox{}\verb@free1way.table <- function(y, link = c("logit", "probit", "cloglog", "loglog"), @\\
\mbox{}\verb@                           mu = 0, B = 0, exact = FALSE, ...)@\\
\mbox{}\verb@{@\\
\mbox{}\verb@@\\
\mbox{}\verb@    cl <- match.call()@\\
\mbox{}\verb@@\\
\mbox{}\verb@    d <- dim(y)@\\
\mbox{}\verb@    dn <- dimnames(y)@\\
\mbox{}\verb@    DNAME <- NULL@\\
\mbox{}\verb@    if (!is.null(dn)) {@\\
\mbox{}\verb@        DNAME <- paste(names(dn)[1], "by", names(dn)[2], @\\
\mbox{}\verb@                       paste0("(", paste0(dn[2], collapse = ", "), ")"))@\\
\mbox{}\verb@        if (length(dn) == 3L)@\\
\mbox{}\verb@            DNAME <- paste(DNAME, "\n\t stratified by", names(dn)[3])@\\
\mbox{}\verb@    }@\\
\mbox{}\verb@@\\
\mbox{}\verb@    @\hbox{$\langle\,${\itshape link2fun}\nobreak\ {\footnotesize \NWlink{nuweb45}{45}}$\,\rangle$}\verb@@\\
\mbox{}\verb@@\\
\mbox{}\verb@    if (!(length(mu) == 1L || length(mu) == d[2L] - 1L)) {@\\
\mbox{}\verb@        warning(gettextf("incompatible length of argument 'mu' in %s",@\\
\mbox{}\verb@                         "free1way"),@\\
\mbox{}\verb@                domain = NA)@\\
\mbox{}\verb@        mu <- rep(mu, length.out = d[2L] - 1L)@\\
\mbox{}\verb@    }@\\
\mbox{}\verb@@\\
\mbox{}\verb@    ret <- .free1wayML(y, link = link, mu = mu, ...)@\\
\mbox{}\verb@    ret$link <- link@\\
\mbox{}\verb@    ret$data.name <- DNAME@\\
\mbox{}\verb@    ret$call <- cl@\\
\mbox{}\verb@@\\
\mbox{}\verb@    @\hbox{$\langle\,${\itshape free1way permutation tests}\nobreak\ {\footnotesize \NWlink{nuweb47}{47}}$\,\rangle$}\verb@@\\
\mbox{}\verb@@\\
\mbox{}\verb@    if (ret$MPL_Jeffreys) @\\
\mbox{}\verb@        ret$method <- paste(ret$method, @\\
\mbox{}\verb@            "with Jeffreys prior penalisation", sep = ", ")@\\
\mbox{}\verb@@\\
\mbox{}\verb@    class(ret) <- "free1way"@\\
\mbox{}\verb@    return(ret)@\\
\mbox{}\verb@}@\\
\mbox{}\verb@@{\NWsep}
\end{list}
\vspace{-1.5ex}
\footnotesize
\begin{list}{}{\setlength{\itemsep}{-\parsep}\setlength{\itemindent}{-\leftmargin}}
\item \NWtxtMacroRefIn\ \NWlink{nuweb25b}{25b}.

\item{}
\end{list}
\end{minipage}\vspace{4ex}
\end{flushleft}
where preparations for permutations tests are performed before returning the
object

\begin{flushleft} \small
\begin{minipage}{\linewidth}\label{scrap50}\raggedright\small
\NWtarget{nuweb47}{} $\langle\,${\itshape free1way permutation tests}\nobreak\ {\footnotesize {47}}$\,\rangle\equiv$
\vspace{-1ex}
\begin{list}{}{} \item
\mbox{}\verb@@\\
\mbox{}\verb@alias <- link$alias@\\
\mbox{}\verb@if (length(link$alias) == 2L) alias <- alias[1L + (d[2] > 2L)]@\\
\mbox{}\verb@stratified <- FALSE@\\
\mbox{}\verb@if (length(d) == 3L) stratified <- d[3L] > 1@\\
\mbox{}\verb@ret$method <- paste(ifelse(stratified, "Stratified", ""), @\\
\mbox{}\verb@                    paste0(d[2L], "-sample"), alias, @\\
\mbox{}\verb@                    "test against", link$model, "alternatives")@\\
\mbox{}\verb@@\\
\mbox{}\verb@cf <- ret$par@\\
\mbox{}\verb@### compute the permutation distribution always@\\
\mbox{}\verb@### for H0: delta = 0, not delta = mu@\\
\mbox{}\verb@### otherwise, permutation confidence intervals@\\
\mbox{}\verb@### are not aligned with permutation p-values@\\
\mbox{}\verb@cf[idx <- seq_len(d[2L] - 1L)] <- -mu@\\
\mbox{}\verb@pr <- ret$profile(cf, idx)@\\
\mbox{}\verb@res <- - pr$negresiduals@\\
\mbox{}\verb@if (d[2L] == 2L)@\\
\mbox{}\verb@    res <- res / sqrt(c(pr$hessian))@\\
\mbox{}\verb@@\\
\mbox{}\verb@@\hbox{$\langle\,${\itshape Strasser Weber}\nobreak\ {\footnotesize \NWlink{nuweb40}{40}}$\,\rangle$}\verb@@\\
\mbox{}\verb@@\hbox{$\langle\,${\itshape resampling}\nobreak\ {\footnotesize \NWlink{nuweb41}{41}}$\,\rangle$}\verb@@\\
\mbox{}\verb@@\\
\mbox{}\verb@if (length(dim(y)) == 3L) y <- y[,,ret$strata, drop = FALSE]@\\
\mbox{}\verb@if (length(dim(y)) == 4L) {@\\
\mbox{}\verb@    y <- y[,,ret$strata,, drop = FALSE]@\\
\mbox{}\verb@    dy <- dim(y)@\\
\mbox{}\verb@    dy[1] <- dy[1] * 2@\\
\mbox{}\verb@    y <- apply(y, 3, function(x) rbind(x[,,"TRUE"], x[,,"FALSE"]), simplify = FALSE)@\\
\mbox{}\verb@    y <- array(unlist(y), dim = dy[1:3])@\\
\mbox{}\verb@}@\\
\mbox{}\verb@@\\
\mbox{}\verb@### exact two-sample Wilcoxon w/o stratification@\\
\mbox{}\verb@if (exact) {@\\
\mbox{}\verb@    if (!stratified && link$model == "proportional odds" && d[2L] == 2L) {@\\
\mbox{}\verb@        @\hbox{$\langle\,${\itshape exact proportional odds}\nobreak\ {\footnotesize \NWlink{nuweb42}{42}}$\,\rangle$}\verb@@\\
\mbox{}\verb@        ret$exact <- .exact(c(res, res), grp = unclass(gl(2, d[1L])) - 1L,@\\
\mbox{}\verb@                            w = c(y))@\\
\mbox{}\verb@        B <- 0@\\
\mbox{}\verb@    } else {@\\
\mbox{}\verb@        warning(gettextf("cannot compute exact permutation distribution in %s",@\\
\mbox{}\verb@                         "free1way"),@\\
\mbox{}\verb@                domain = NA)@\\
\mbox{}\verb@    }@\\
\mbox{}\verb@} @\\
\mbox{}\verb@ret$perm <- .resample(res, y, B = B)@\\
\mbox{}\verb@@{\NWsep}
\end{list}
\vspace{-1.5ex}
\footnotesize
\begin{list}{}{\setlength{\itemsep}{-\parsep}\setlength{\itemindent}{-\leftmargin}}
\item \NWtxtMacroRefIn\ \NWlink{nuweb46}{46}.

\item{}
\end{list}
\end{minipage}\vspace{4ex}
\end{flushleft}
The \code{formula} method allows formulae 
<<formula, eval = FALSE>>=
y ~ groups | blocks
@
for model specification. We start handling the formula

\begin{flushleft} \small
\begin{minipage}{\linewidth}\label{scrap51}\raggedright\small
\NWtarget{nuweb48}{} $\langle\,${\itshape formula business}\nobreak\ {\footnotesize {48}}$\,\rangle\equiv$
\vspace{-1ex}
\begin{list}{}{} \item
\mbox{}\verb@@\\
\mbox{}\verb@if(missing(formula) || (length(formula) != 3L))@\\
\mbox{}\verb@    stop("'formula' missing or incorrect")@\\
\mbox{}\verb@@\\
\mbox{}\verb@if (stratum <- (length(formula[[3L]]) > 1)) {@\\
\mbox{}\verb@  if ((length(formula[[3L]]) != 3L) || @\\
\mbox{}\verb@      (formula[[3L]][[1L]] != as.name("|")) || @\\
\mbox{}\verb@      (length(formula[[3L]][[2L]]) !=  1L) || @\\
\mbox{}\verb@      (length(formula[[3L]][[3L]]) != 1L)) @\\
\mbox{}\verb@      stop(gettextf("incorrect specification for '%s'", "formula"),@\\
\mbox{}\verb@           domain = NA)@\\
\mbox{}\verb@  formula[[3L]][[1L]] <- as.name("+")@\\
\mbox{}\verb@}@\\
\mbox{}\verb@@\\
\mbox{}\verb@formula <- terms(formula)@\\
\mbox{}\verb@if (length(attr(formula, "term.labels")) > 1L + stratum)@\\
\mbox{}\verb@   stop("'formula' missing or incorrect")@\\
\mbox{}\verb@group <- attr(formula, "term.labels")[1L]@\\
\mbox{}\verb@@\\
\mbox{}\verb@m <- match.call(expand.dots = FALSE)@\\
\mbox{}\verb@m$formula <- formula@\\
\mbox{}\verb@if (is.matrix(eval(m$data, parent.frame())))@\\
\mbox{}\verb@    m$data <- as.data.frame(data)@\\
\mbox{}\verb@## need stats:: for non-standard evaluation@\\
\mbox{}\verb@m[[1L]] <- quote(stats::model.frame)@\\
\mbox{}\verb@m$... <- NULL@\\
\mbox{}\verb@mf <- eval(m, parent.frame())@\\
\mbox{}\verb@@{\NWsep}
\end{list}
\vspace{-1.5ex}
\footnotesize
\begin{list}{}{\setlength{\itemsep}{-\parsep}\setlength{\itemindent}{-\leftmargin}}
\item \NWtxtMacroRefIn\ \NWlink{nuweb49}{49}.

\item{}
\end{list}
\end{minipage}\vspace{4ex}
\end{flushleft}
\begin{flushleft} \small
\begin{minipage}{\linewidth}\label{scrap52}\raggedright\small
\NWtarget{nuweb49}{} $\langle\,${\itshape free1way formula}\nobreak\ {\footnotesize {49}}$\,\rangle\equiv$
\vspace{-1ex}
\begin{list}{}{} \item
\mbox{}\verb@@\\
\mbox{}\verb@free1way.formula <- function(formula, data, weights, subset, na.action = na.pass, @\\
\mbox{}\verb@                             event = NULL, ...)@\\
\mbox{}\verb@{@\\
\mbox{}\verb@    cl <- match.call()@\\
\mbox{}\verb@@\\
\mbox{}\verb@    @\hbox{$\langle\,${\itshape formula business}\nobreak\ {\footnotesize \NWlink{nuweb48}{48}}$\,\rangle$}\verb@@\\
\mbox{}\verb@@\\
\mbox{}\verb@    response <- attr(attr(mf, "terms"), "response")@\\
\mbox{}\verb@    DNAME <- paste(vn <- c(names(mf)[response], group), @\\
\mbox{}\verb@                   collapse = " by ") # works in all cases@\\
\mbox{}\verb@    w <- as.vector(model.weights(mf))@\\
\mbox{}\verb@    y <- mf[[response]]@\\
\mbox{}\verb@    if (inherits(y, "Surv")) {@\\
\mbox{}\verb@        if (!is.null(event))@\\
\mbox{}\verb@            stop(gettextf("cannot have both a 'Surv()' response and an 'event' argument in %s",@\\
\mbox{}\verb@                          "free1way"),@\\
\mbox{}\verb@                 domain = NA)@\\
\mbox{}\verb@        if (attr(y, "type") != "right")@\\
\mbox{}\verb@            stop(gettextf("%s currently only allows independent right-censoring",@\\
\mbox{}\verb@                          "free1way"),@\\
\mbox{}\verb@                 domain = NA)@\\
\mbox{}\verb@        event <- (y[,2] > 0)@\\
\mbox{}\verb@        y <- y[,1]@\\
\mbox{}\verb@    }@\\
\mbox{}\verb@    g <- factor(mf[[group]])@\\
\mbox{}\verb@    mf[[group]] <- g@\\
\mbox{}\verb@    lev <- levels(g)@\\
\mbox{}\verb@    DNAME <- paste(DNAME, paste0("(", paste0(lev, collapse = ", "), ")"))@\\
\mbox{}\verb@    if (nlevels(g) < 2L)@\\
\mbox{}\verb@        stop(gettextf("incorrect argument 'groups' in %s: at least two groups needed",@\\
\mbox{}\verb@                      "free1way"),@\\
\mbox{}\verb@             domain = NA)@\\
\mbox{}\verb@    if (stratum) {@\\
\mbox{}\verb@        st <- factor(mf[[3L]])@\\
\mbox{}\verb@        mf[[3L]] <- st@\\
\mbox{}\verb@        ### nlevels(st) == 1L is explicitly allowed@\\
\mbox{}\verb@        vn <- c(vn, names(mf)[3L])@\\
\mbox{}\verb@        RVAL <- free1way(y = y, groups = g, blocks = st, event = event, @\\
\mbox{}\verb@                         weights = w, varnames = vn, ...)@\\
\mbox{}\verb@        DNAME <- paste(DNAME, paste("\n\t stratified by", names(mf)[3L]))@\\
\mbox{}\verb@    } else {@\\
\mbox{}\verb@        ## Call the corresponding method@\\
\mbox{}\verb@        RVAL <- free1way(y = y, groups = g, event = event, weights = w, @\\
\mbox{}\verb@                         varnames = vn, ...)@\\
\mbox{}\verb@    }@\\
\mbox{}\verb@    RVAL$data <- mf@\\
\mbox{}\verb@    RVAL$data.name <- DNAME@\\
\mbox{}\verb@    RVAL$call <- cl@\\
\mbox{}\verb@    RVAL@\\
\mbox{}\verb@}@\\
\mbox{}\verb@@{\NWsep}
\end{list}
\vspace{-1.5ex}
\footnotesize
\begin{list}{}{\setlength{\itemsep}{-\parsep}\setlength{\itemindent}{-\leftmargin}}
\item \NWtxtMacroRefIn\ \NWlink{nuweb25b}{25b}.

\item{}
\end{list}
\end{minipage}\vspace{4ex}
\end{flushleft}
The method for numeric outcomes provides a discretisation at the unique
observed outcome values, or (for very large sample sizes), for binned
outcomes. The \code{event} argument is a logical where \code{TRUE} is
interpreted as an event and \code{FALSE} as right-censored observation

\begin{flushleft} \small
\begin{minipage}{\linewidth}\label{scrap53}\raggedright\small
\NWtarget{nuweb50}{} $\langle\,${\itshape variable names and checks}\nobreak\ {\footnotesize {50}}$\,\rangle\equiv$
\vspace{-1ex}
\begin{list}{}{} \item
\mbox{}\verb@@\\
\mbox{}\verb@cl <- match.call()@\\
\mbox{}\verb@if (is.null(varnames))@\\
\mbox{}\verb@    varnames <- c(deparse1(substitute(y)), @\\
\mbox{}\verb@                  deparse1(substitute(groups)), @\\
\mbox{}\verb@                  deparse1(substitute(blocks)))@\\
\mbox{}\verb@@\\
\mbox{}\verb@DNAME <- paste(varnames[1], "by", varnames[2])@\\
\mbox{}\verb@groups <- factor(groups)@\\
\mbox{}\verb@if (nlevels(groups) < 2L)@\\
\mbox{}\verb@    stop(gettextf("incorrect argument 'groups' in %s: at least two groups needed",@\\
\mbox{}\verb@                  "free1way"),@\\
\mbox{}\verb@         domain = NA)@\\
\mbox{}\verb@DNAME <- paste(DNAME, paste0("(", paste0(levels(groups), collapse = ", "), @\\
\mbox{}\verb@                             ")"))@\\
\mbox{}\verb@@\\
\mbox{}\verb@if (!is.null(blocks)) {@\\
\mbox{}\verb@    if (length(unique(blocks)) < 2L) {@\\
\mbox{}\verb@        blocks <- NULL@\\
\mbox{}\verb@    } else {@\\
\mbox{}\verb@        blocks <- factor(blocks)@\\
\mbox{}\verb@        DNAME <- paste(DNAME, "\n\t stratified by", varnames[3])@\\
\mbox{}\verb@    }@\\
\mbox{}\verb@}@\\
\mbox{}\verb@varnames <- varnames[varnames != "NULL"]@\\
\mbox{}\verb@@{\NWsep}
\end{list}
\vspace{-1.5ex}
\footnotesize
\begin{list}{}{\setlength{\itemsep}{-\parsep}\setlength{\itemindent}{-\leftmargin}}
\item \NWtxtMacroRefIn\ \NWlink{nuweb51}{51}\NWlink{nuweb52}{, 52}.

\item{}
\end{list}
\end{minipage}\vspace{4ex}
\end{flushleft}
Note that the return value of \code{unique} might differ between platforms.
Because users can decide about the unique values in the vector \code{y} (by using
\code{round} or \code{trunc}, for example), before calling this function, we
refrain from handling this issue internally. However, we offer an
\code{nbins} argument for binning response observations at sample quantiles 
in the absence of right-censoring. Note that we ignore the blocks when
calling \code{cut}. This is inefficient for many blocks with non-overlapping
support of the outcome distribtion, as large sparse tables are resulting. We
remove the corresponding elements from the first dimension of such a table
later on (in \code{.free1wayML}). The reason for this inconvenice is that
all the data going into \code{.free1wayML} can be stored as a \code{table} (and
not as a list of things).

\begin{flushleft} \small
\begin{minipage}{\linewidth}\label{scrap54}\raggedright\small
\NWtarget{nuweb51}{} $\langle\,${\itshape free1way numeric}\nobreak\ {\footnotesize {51}}$\,\rangle\equiv$
\vspace{-1ex}
\begin{list}{}{} \item
\mbox{}\verb@@\\
\mbox{}\verb@free1way.numeric <- function(y, groups, blocks = NULL, event = NULL, @\\
\mbox{}\verb@                             weights = NULL, nbins = 0, varnames = NULL, ...) @\\
\mbox{}\verb@{@\\
\mbox{}\verb@@\\
\mbox{}\verb@    @\hbox{$\langle\,${\itshape variable names and checks}\nobreak\ {\footnotesize \NWlink{nuweb50}{50}}$\,\rangle$}\verb@@\\
\mbox{}\verb@@\\
\mbox{}\verb@    if (!is.null(event)) {@\\
\mbox{}\verb@        if (!is.logical(event))@\\
\mbox{}\verb@            stop(gettextf("%s currently only allows independent right-censoring",@\\
\mbox{}\verb@                          "free1way"),@\\
\mbox{}\verb@                 domain = NA)@\\
\mbox{}\verb@        uy <- sort(unique(y[event]))@\\
\mbox{}\verb@    } else {@\\
\mbox{}\verb@        uy <- sort(unique(y))@\\
\mbox{}\verb@    }@\\
\mbox{}\verb@    if (nbins && nbins < length(uy) && is.null(event)) {@\\
\mbox{}\verb@        nbins <- ceiling(nbins)@\\
\mbox{}\verb@        breaks <- c(-Inf, quantile(y, probs = seq_len(nbins) / (nbins + 1L)), @\\
\mbox{}\verb@                     Inf)@\\
\mbox{}\verb@    } else {@\\
\mbox{}\verb@        breaks <- c(-Inf, uy, Inf)@\\
\mbox{}\verb@    }@\\
\mbox{}\verb@    r <- ordered(cut(y, breaks = breaks, ordered_result = TRUE, @\\
\mbox{}\verb@                     labels = FALSE)) ### avoids costly formatC call@\\
\mbox{}\verb@    RVAL <- free1way(y = r, groups = groups, blocks = blocks, @\\
\mbox{}\verb@                     event = event, weights = weights, @\\
\mbox{}\verb@                     varnames = varnames, ...)@\\
\mbox{}\verb@    RVAL$data.name <- DNAME@\\
\mbox{}\verb@    RVAL$call <- cl@\\
\mbox{}\verb@    RVAL@\\
\mbox{}\verb@}@\\
\mbox{}\verb@@{\NWsep}
\end{list}
\vspace{-1.5ex}
\footnotesize
\begin{list}{}{\setlength{\itemsep}{-\parsep}\setlength{\itemindent}{-\leftmargin}}
\item \NWtxtMacroRefIn\ \NWlink{nuweb25b}{25b}.

\item{}
\end{list}
\end{minipage}\vspace{4ex}
\end{flushleft}
The \code{factor} method also allows right-censoring but otherwise is just a
call to \code{xtabs}:

\begin{flushleft} \small
\begin{minipage}{\linewidth}\label{scrap55}\raggedright\small
\NWtarget{nuweb52}{} $\langle\,${\itshape free1way factor}\nobreak\ {\footnotesize {52}}$\,\rangle\equiv$
\vspace{-1ex}
\begin{list}{}{} \item
\mbox{}\verb@@\\
\mbox{}\verb@free1way.factor <- function(y, groups, blocks = NULL, event = NULL, @\\
\mbox{}\verb@                            weights = NULL, varnames = NULL, ...) @\\
\mbox{}\verb@{@\\
\mbox{}\verb@@\\
\mbox{}\verb@    @\hbox{$\langle\,${\itshape variable names and checks}\nobreak\ {\footnotesize \NWlink{nuweb50}{50}}$\,\rangle$}\verb@@\\
\mbox{}\verb@@\\
\mbox{}\verb@    if (nlevels(y) > 2L && !is.ordered(y))@\\
\mbox{}\verb@        stop(gettextf("%s is not defined for unordered responses",@\\
\mbox{}\verb@                              "free1way"),@\\
\mbox{}\verb@             domain = NA)@\\
\mbox{}\verb@    d <- data.frame(w = 1, y = y, groups = groups)@\\
\mbox{}\verb@    if (!is.null(weights)) d$w <- weights@\\
\mbox{}\verb@    if (is.null(blocks)) blocks <- gl(1, nrow(d))@\\
\mbox{}\verb@    d$blocks <- blocks @\\
\mbox{}\verb@    if (!is.null(event)) {@\\
\mbox{}\verb@       if (!is.logical(event))@\\
\mbox{}\verb@            stop(gettextf("%s currently only allows independent right-censoring",@\\
\mbox{}\verb@                          "free1way"),@\\
\mbox{}\verb@                domain = NA)@\\
\mbox{}\verb@        d$event <- factor(event, levels = c(FALSE, TRUE), @\\
\mbox{}\verb@                          labels = c("FALSE", "TRUE"))@\\
\mbox{}\verb@    }@\\
\mbox{}\verb@    tab <- xtabs(w ~ ., data = d)@\\
\mbox{}\verb@    dn <- dimnames(tab)@\\
\mbox{}\verb@    names(dn)[seq_along(varnames)] <- varnames@\\
\mbox{}\verb@    dimnames(tab) <- dn@\\
\mbox{}\verb@    RVAL <- free1way(tab, ...)@\\
\mbox{}\verb@    RVAL$data.name <- DNAME@\\
\mbox{}\verb@    RVAL$call <- cl@\\
\mbox{}\verb@    RVAL@\\
\mbox{}\verb@}@\\
\mbox{}\verb@@{\NWsep}
\end{list}
\vspace{-1.5ex}
\footnotesize
\begin{list}{}{\setlength{\itemsep}{-\parsep}\setlength{\itemindent}{-\leftmargin}}
\item \NWtxtMacroRefIn\ \NWlink{nuweb25b}{25b}.

\item{}
\end{list}
\end{minipage}\vspace{4ex}
\end{flushleft}
\section{\code{free1way} Methods}

We start with \code{coef}, \code{vcov}, and
\code{model.frame}/\code{model.matrix} methods such that multiple comparison
procedures from \pkg{multcomp} will work out of the box. The \code{coef}
method allows to obtain effects at alternative scales: probabilistic indices
(\code{AUC} = \code{PI}) or the overlap coefficient:

\begin{flushleft} \small
\begin{minipage}{\linewidth}\label{scrap56}\raggedright\small
\NWtarget{nuweb53}{} $\langle\,${\itshape free1way methods}\nobreak\ {\footnotesize {53}}$\,\rangle\equiv$
\vspace{-1ex}
\begin{list}{}{} \item
\mbox{}\verb@@\\
\mbox{}\verb@coef.free1way <- function(object, what = c("shift", "PI", "AUC", "OVL"), ...)@\\
\mbox{}\verb@{@\\
\mbox{}\verb@    what <- match.arg(what)@\\
\mbox{}\verb@    cf <- object$coefficients@\\
\mbox{}\verb@    return(switch(what, "shift" = cf,@\\
\mbox{}\verb@                        "PI" = object$link$parm2PI(cf),@\\
\mbox{}\verb@                        "AUC" = object$link$parm2PI(cf),        ### same as PI@\\
\mbox{}\verb@                        "OVL" = object$link$parm2OVL(cf)))@\\
\mbox{}\verb@}@\\
\mbox{}\verb@vcov.free1way <- function(object, ...)@\\
\mbox{}\verb@    object$vcov@\\
\mbox{}\verb@logLik.free1way <- function(object, ...)@\\
\mbox{}\verb@    -object$value@\\
\mbox{}\verb@model.frame.free1way <- function(formula, ...) {@\\
\mbox{}\verb@    if (!is.null(formula[["data"]])) return(formula[["data"]])@\\
\mbox{}\verb@    ret <- as.data.frame(formula$table)@\\
\mbox{}\verb@    ret <- ret[rep(seq_len(nrow(ret)), ret$Freq),,drop = FALSE]@\\
\mbox{}\verb@    ret@\\
\mbox{}\verb@}@\\
\mbox{}\verb@### the next two might go into multcomp@\\
\mbox{}\verb@terms.free1way <- function(x, ...) @\\
\mbox{}\verb@{@\\
\mbox{}\verb@    mf <- model.frame(x)@\\
\mbox{}\verb@    terms(as.formula(paste(names(mf)[1:2], collapse = "~")), @\\
\mbox{}\verb@          data = mf)@\\
\mbox{}\verb@}@\\
\mbox{}\verb@model.matrix.free1way <- function (object, ...) @\\
\mbox{}\verb@{@\\
\mbox{}\verb@    mf <- model.frame(object)@\\
\mbox{}\verb@    tm <- terms(object)@\\
\mbox{}\verb@    mm <- model.matrix(delete.response(tm), data = mf)@\\
\mbox{}\verb@    at <- attributes(mm)@\\
\mbox{}\verb@    mm <- mm[, -1]@\\
\mbox{}\verb@    at$dim[2] <- at$dim[2] - 1@\\
\mbox{}\verb@    at$dimnames[[2]] <- at$dimnames[[2]][-1]@\\
\mbox{}\verb@    at$assign <- at$assign[-1]@\\
\mbox{}\verb@    attributes(mm) <- at@\\
\mbox{}\verb@    mm@\\
\mbox{}\verb@}@\\
\mbox{}\verb@@{\NWsep}
\end{list}
\vspace{-1.5ex}
\footnotesize
\begin{list}{}{\setlength{\itemsep}{-\parsep}\setlength{\itemindent}{-\leftmargin}}
\item \NWtxtMacroRefIn\ \NWlink{nuweb25b}{25b}.

\item{}
\end{list}
\end{minipage}\vspace{4ex}
\end{flushleft}
We use the \code{print} method to report different test statistics and
corresponding $p$-values via the \code{test} and \code{alternative}
arguments. The reason for doing so is that the parameter estimation only
needs to be performed once in cases users are interested in different
tests or (see below) confidence intervals. By default, an asymptotic  
permutation test is performed, mainly because the $p$-values coincide with
some special cases (\code{wilcox,kruskal,friedman.test}):

\begin{flushleft} \small
\begin{minipage}{\linewidth}\label{scrap57}\raggedright\small
\NWtarget{nuweb54}{} $\langle\,${\itshape free1way print}\nobreak\ {\footnotesize {54}}$\,\rangle\equiv$
\vspace{-1ex}
\begin{list}{}{} \item
\mbox{}\verb@@\\
\mbox{}\verb@.print.free1way <- function(x, test = c("Permutation", "Wald", "LRT", "Rao"), @\\
\mbox{}\verb@                            alternative = c("two.sided", "less", "greater"), @\\
\mbox{}\verb@                            tol = sqrt(.Machine$double.eps), @\\
\mbox{}\verb@                            mu = 0, ### allow permutation testing non-null hypotheses@\\
\mbox{}\verb@                                    ### in alignment with confint(free1way(, B > 0))@\\
\mbox{}\verb@                            ...)@\\
\mbox{}\verb@{@\\
\mbox{}\verb@@\\
\mbox{}\verb@    test <- match.arg(test)@\\
\mbox{}\verb@    alternative <- match.arg(alternative)@\\
\mbox{}\verb@@\\
\mbox{}\verb@    ### global@\\
\mbox{}\verb@    cf <- coef(x)@\\
\mbox{}\verb@    if ((length(cf) > 1L || test == "LRT") && alternative != "two.sided") @\\
\mbox{}\verb@        stop(gettextf("cannot compute one-sided p-values in %s",@\\
\mbox{}\verb@                      "free1way"),@\\
\mbox{}\verb@             domain = NA)@\\
\mbox{}\verb@@\\
\mbox{}\verb@    DF <- NULL@\\
\mbox{}\verb@    parm <- seq_along(cf)@\\
\mbox{}\verb@    value <- mu@\\
\mbox{}\verb@@\\
\mbox{}\verb@    @\hbox{$\langle\,${\itshape statistics}\nobreak\ {\footnotesize \NWlink{nuweb36a}{36a}}$\,\rangle$}\verb@@\\
\mbox{}\verb@@\\
\mbox{}\verb@    if (test == "Permutation") {@\\
\mbox{}\verb@        @\hbox{$\langle\,${\itshape Permutation p-values}\nobreak\ {\footnotesize \NWlink{nuweb39}{39}}$\,\rangle$}\verb@@\\
\mbox{}\verb@    }@\\
\mbox{}\verb@@\\
\mbox{}\verb@    RVAL <- list(statistic = STATISTIC, parameter = DF, p.value = PVAL, @\\
\mbox{}\verb@        null.value = x$mu, alternative = alternative, method = x$method, @\\
\mbox{}\verb@        data.name = x$data.name)@\\
\mbox{}\verb@    class(RVAL) <- "htest"@\\
\mbox{}\verb@    return(RVAL)@\\
\mbox{}\verb@}@\\
\mbox{}\verb@@\\
\mbox{}\verb@print.free1way <- function(x, ...) @\\
\mbox{}\verb@{@\\
\mbox{}\verb@    print(ret <- .print.free1way(x, ...))@\\
\mbox{}\verb@    return(invisible(x))@\\
\mbox{}\verb@}@\\
\mbox{}\verb@@{\NWsep}
\end{list}
\vspace{-1.5ex}
\footnotesize
\begin{list}{}{\setlength{\itemsep}{-\parsep}\setlength{\itemindent}{-\leftmargin}}
\item \NWtxtMacroRefIn\ \NWlink{nuweb25b}{25b}.

\item{}
\end{list}
\end{minipage}\vspace{4ex}
\end{flushleft}
The \code{summary} method performs population Wald inference unless the
\code{test} argument is specified:

\begin{flushleft} \small
\begin{minipage}{\linewidth}\label{scrap58}\raggedright\small
\NWtarget{nuweb55}{} $\langle\,${\itshape free1way summary}\nobreak\ {\footnotesize {55}}$\,\rangle\equiv$
\vspace{-1ex}
\begin{list}{}{} \item
\mbox{}\verb@@\\
\mbox{}\verb@summary.free1way <- function(object, test, @\\
\mbox{}\verb@                             alternative = c("two.sided", "less", "greater"), @\\
\mbox{}\verb@                             tol = .Machine$double.eps, ...)@\\
\mbox{}\verb@{@\\
\mbox{}\verb@@\\
\mbox{}\verb@    if (!missing(test))@\\
\mbox{}\verb@        return(.print.free1way(object, test = test, @\\
\mbox{}\verb@                               alternative = alternative, tol = tol, ...))@\\
\mbox{}\verb@   @\\
\mbox{}\verb@    alternative <- match.arg(alternative)@\\
\mbox{}\verb@@\\
\mbox{}\verb@    ESTIMATE <- coef(object)@\\
\mbox{}\verb@    SE <- sqrt(diag(vcov(object)))@\\
\mbox{}\verb@    STATISTIC <- unname(ESTIMATE / SE)@\\
\mbox{}\verb@    if (alternative == "less") {@\\
\mbox{}\verb@        PVAL <- pnorm(STATISTIC)@\\
\mbox{}\verb@    } else if (alternative == "greater") {@\\
\mbox{}\verb@        PVAL <- pnorm(STATISTIC, lower.tail = FALSE)@\\
\mbox{}\verb@    } else {@\\
\mbox{}\verb@        PVAL <- 2 * pnorm(-abs(STATISTIC))@\\
\mbox{}\verb@    }@\\
\mbox{}\verb@    cfmat <- cbind(ESTIMATE, SE, STATISTIC, PVAL)@\\
\mbox{}\verb@    colnames(cfmat) <- c(object$link$parm, "Std. Error", "z value",@\\
\mbox{}\verb@                         switch(alternative, "two.sided" = "P(>|z|)",@\\
\mbox{}\verb@                                             "less" = "P(<z)",@\\
\mbox{}\verb@                                             "greater" = "P(>z)"))@\\
\mbox{}\verb@    ret <- list(call = object$call, coefficients = cfmat)@\\
\mbox{}\verb@    class(ret) <- "summary.free1way"@\\
\mbox{}\verb@    return(ret)@\\
\mbox{}\verb@}@\\
\mbox{}\verb@@\\
\mbox{}\verb@print.summary.free1way <- function(x, ...) @\\
\mbox{}\verb@{@\\
\mbox{}\verb@    cat("\nCall:\n", paste(deparse(x$call), sep = "\n", collapse = "\n"), @\\
\mbox{}\verb@        "\n\n", sep = "")@\\
\mbox{}\verb@    cat("Coefficients:\n")@\\
\mbox{}\verb@    printCoefmat(x$coefficients)@\\
\mbox{}\verb@}@\\
\mbox{}\verb@@{\NWsep}
\end{list}
\vspace{-1.5ex}
\footnotesize
\begin{list}{}{\setlength{\itemsep}{-\parsep}\setlength{\itemindent}{-\leftmargin}}
\item \NWtxtMacroRefIn\ \NWlink{nuweb25b}{25b}.

\item{}
\end{list}
\end{minipage}\vspace{4ex}
\end{flushleft}
Confidence intervals are computed by inversion of the corresponding test
statistics. Because LRT and Rao confidence intervals are invariant with
respect to transformations, proper LRT or Rao confidence intervals for probabilistic
indices or overlap coefficients can also be computed. 

We begin computing the critical values for permutation tests, making sure
the confidence intervals will be in line with one- and two-sided $p$-values:
\begin{flushleft} \small
\begin{minipage}{\linewidth}\label{scrap59}\raggedright\small
\NWtarget{nuweb56}{} $\langle\,${\itshape permutation confint}\nobreak\ {\footnotesize {56}}$\,\rangle\equiv$
\vspace{-1ex}
\begin{list}{}{} \item
\mbox{}\verb@@\\
\mbox{}\verb@if (length(cf) > 1L)@\\
\mbox{}\verb@    stop(gettextf("permutation confidence intervals only available for 2-sample comparisons in %s",@\\
\mbox{}\verb@                  "confint.free1way"),@\\
\mbox{}\verb@         domain = NA)@\\
\mbox{}\verb@if (!is.null(object$exact)) {@\\
\mbox{}\verb@    qu <- c(object$exact$qle(1 - conf.level),@\\
\mbox{}\verb@            object$exact$qgr(1 - conf.level))@\\
\mbox{}\verb@} else {@\\
\mbox{}\verb@    if (is.null(object$perm$permStat)) {@\\
\mbox{}\verb@        qu <- qnorm(conf.level) * c(-1, 1)@\\
\mbox{}\verb@    } else {@\\
\mbox{}\verb@        .pq <- function(s, alpha) @\\
\mbox{}\verb@        {@\\
\mbox{}\verb@            su <- sort(unique(s)) @\\
\mbox{}\verb@            ### F = P(T <= t), S = P(T >= t)@\\
\mbox{}\verb@            Fs <- cumsum(st <- table(match(s, su)))@\\
\mbox{}\verb@            Ss <- length(s) - Fs + st@\\
\mbox{}\verb@            c(max(su[Fs <= alpha * length(s)]),@\\
\mbox{}\verb@              min(su[Ss <= alpha * length(s)]))@\\
\mbox{}\verb@        }@\\
\mbox{}\verb@        ### cf PVAL computation!!!@\\
\mbox{}\verb@        rs <- object$perm$permStat@\\
\mbox{}\verb@        qu <- .pq(round(rs, 10), alpha = 1 - conf.level)@\\
\mbox{}\verb@        att.level <- mean(rs > qu[1] & rs < qu[2])@\\
\mbox{}\verb@        attr(CINT, "Attained level") <- att.level@\\
\mbox{}\verb@    }@\\
\mbox{}\verb@}@\\
\mbox{}\verb@@{\NWsep}
\end{list}
\vspace{-1.5ex}
\footnotesize
\begin{list}{}{\setlength{\itemsep}{-\parsep}\setlength{\itemindent}{-\leftmargin}}
\item \NWtxtMacroRefIn\ \NWlink{nuweb59}{59}.

\item{}
\end{list}
\end{minipage}\vspace{4ex}
\end{flushleft}
The \code{confint} method starts with Wald intervals, which are either returned or used as starting
values for the inversion. We start with the lower bound. Sometimes (for
example in case of complete separation), the information is very low so the
Wald intervals are extremely wide and the profile log-likelihood cannot be
computed. So we try to make the starting interval wider in a step-wise
manner. However, this may still fail and we thus exit gently, returning
\code{NA} and issuing a warning.

\begin{flushleft} \small
\begin{minipage}{\linewidth}\label{scrap60}\raggedright\small
\NWtarget{nuweb57}{} $\langle\,${\itshape confint lower}\nobreak\ {\footnotesize {57}}$\,\rangle\equiv$
\vspace{-1ex}
\begin{list}{}{} \item
\mbox{}\verb@@\\
\mbox{}\verb@CINT[p,1] <- max(CINT[p, 1], cf[p] - 1)@\\
\mbox{}\verb@sdlwr <- sign(sfun(cf[p], parm = p, quantile = qu[2]))@\\
\mbox{}\verb@slwr <- try(sfun(CINT[p,1], parm = p, quantile = qu[2]))@\\
\mbox{}\verb@k <- 1@\\
\mbox{}\verb@if (inherits(slwr, "try-error")) {@\\
\mbox{}\verb@    CINT[p,1] <- NA@\\
\mbox{}\verb@} else {@\\
\mbox{}\verb@    while ((is.na(slwr) || @\\
\mbox{}\verb@            sign(slwr) == sdlwr) && k < 30) {@\\
\mbox{}\verb@        CINT[p,1] <- CINT[p,1] - 1@\\
\mbox{}\verb@        slwr <- try(sfun(CINT[p,1], parm = p, quantile = qu[2]))@\\
\mbox{}\verb@        if (inherits(slwr, "try-error")) {@\\
\mbox{}\verb@            CINT[p,1] <- NA@\\
\mbox{}\verb@            break()@\\
\mbox{}\verb@        }@\\
\mbox{}\verb@        k <- k + 1@\\
\mbox{}\verb@    }@\\
\mbox{}\verb@}@\\
\mbox{}\verb@if (k == 30) {@\\
\mbox{}\verb@    CINT[p,1] <- NA@\\
\mbox{}\verb@} else {@\\
\mbox{}\verb@    lwr <- try(uniroot(sfun, interval = c(CINT[p,1], cf[p]), @\\
\mbox{}\verb@                       parm = p, quantile = qu[2])$root)@\\
\mbox{}\verb@    if (inherits(lwr, "try-error")) {@\\
\mbox{}\verb@        CINT[p,1] <- NA@\\
\mbox{}\verb@    } else {@\\
\mbox{}\verb@        CINT[p,1] <- lwr@\\
\mbox{}\verb@    }@\\
\mbox{}\verb@}@\\
\mbox{}\verb@if (is.na(CINT[p,1]))@\\
\mbox{}\verb@    warning(gettextf("failed to compute confidence interval in %s",@\\
\mbox{}\verb@                     "confint.free1way"),@\\
\mbox{}\verb@            domain = NA)@\\
\mbox{}\verb@@\\
\mbox{}\verb@@{\NWsep}
\end{list}
\vspace{-1.5ex}
\footnotesize
\begin{list}{}{\setlength{\itemsep}{-\parsep}\setlength{\itemindent}{-\leftmargin}}
\item \NWtxtMacroRefIn\ \NWlink{nuweb59}{59}.

\item{}
\end{list}
\end{minipage}\vspace{4ex}
\end{flushleft}
The upper bound works in the very same way.

\begin{flushleft} \small
\begin{minipage}{\linewidth}\label{scrap61}\raggedright\small
\NWtarget{nuweb58}{} $\langle\,${\itshape confint upper}\nobreak\ {\footnotesize {58}}$\,\rangle\equiv$
\vspace{-1ex}
\begin{list}{}{} \item
\mbox{}\verb@@\\
\mbox{}\verb@CINT[p,2] <- min(CINT[p, 2], cf[p] + 1)@\\
\mbox{}\verb@sdupr <- sign(sfun(cf[p], parm = p, quantile = qu[1]))@\\
\mbox{}\verb@supr <- try(sfun(CINT[p,2], parm = p, quantile = qu[1]))@\\
\mbox{}\verb@k <- 1@\\
\mbox{}\verb@if (inherits(supr, "try-error")) {@\\
\mbox{}\verb@    CINT[p,2] <- NA@\\
\mbox{}\verb@} else {@\\
\mbox{}\verb@    while ((is.na(supr) || @\\
\mbox{}\verb@            sign(supr) == sdupr) && k < 30) {@\\
\mbox{}\verb@        CINT[p,2] <- CINT[p,2] + 1@\\
\mbox{}\verb@        supr <- try(sfun(CINT[p,2], parm = p, quantile = qu[1]))@\\
\mbox{}\verb@        if (inherits(supr, "try-error")) {@\\
\mbox{}\verb@            CINT[p,2] <- NA@\\
\mbox{}\verb@            break()@\\
\mbox{}\verb@        }@\\
\mbox{}\verb@        k <- k + 1@\\
\mbox{}\verb@    }@\\
\mbox{}\verb@}@\\
\mbox{}\verb@if (k == 30) {@\\
\mbox{}\verb@    CINT[p,2] <- NA @\\
\mbox{}\verb@} else {@\\
\mbox{}\verb@    upr <- try(uniroot(sfun, interval = c(cf[p], CINT[p, 2]), @\\
\mbox{}\verb@                       parm = p, quantile = qu[1])$root)@\\
\mbox{}\verb@    if (inherits(upr, "try-error")) {@\\
\mbox{}\verb@        CINT[p, 2] <- NA@\\
\mbox{}\verb@    } else {@\\
\mbox{}\verb@        CINT[p, 2] <- upr@\\
\mbox{}\verb@    }@\\
\mbox{}\verb@}@\\
\mbox{}\verb@if (is.na(CINT[p,2]))@\\
\mbox{}\verb@    warning(gettextf("failed to compute confidence interval in %s",@\\
\mbox{}\verb@                     "confint.free1way"),@\\
\mbox{}\verb@            domain = NA)@\\
\mbox{}\verb@@{\NWsep}
\end{list}
\vspace{-1.5ex}
\footnotesize
\begin{list}{}{\setlength{\itemsep}{-\parsep}\setlength{\itemindent}{-\leftmargin}}
\item \NWtxtMacroRefIn\ \NWlink{nuweb59}{59}.

\item{}
\end{list}
\end{minipage}\vspace{4ex}
\end{flushleft}
\begin{flushleft} \small
\begin{minipage}{\linewidth}\label{scrap62}\raggedright\small
\NWtarget{nuweb59}{} $\langle\,${\itshape free1way confint}\nobreak\ {\footnotesize {59}}$\,\rangle\equiv$
\vspace{-1ex}
\begin{list}{}{} \item
\mbox{}\verb@@\\
\mbox{}\verb@confint.free1way <- function(object, parm,@\\
\mbox{}\verb@    level = .95, test = c("Permutation", "Wald", "LRT", "Rao"), @\\
\mbox{}\verb@    what = c("shift", "PI", "AUC", "OVL"), ...)@\\
\mbox{}\verb@{@\\
\mbox{}\verb@@\\
\mbox{}\verb@    test <- match.arg(test)@\\
\mbox{}\verb@    conf.level <- 1 - (1 - level) / 2@\\
\mbox{}\verb@@\\
\mbox{}\verb@    cf <- coef(object)@\\
\mbox{}\verb@    if (missing(parm)) @\\
\mbox{}\verb@        parm <- seq_along(cf)@\\
\mbox{}\verb@@\\
\mbox{}\verb@    CINT <- confint.default(object, level = level)@\\
\mbox{}\verb@    if (test != "Wald") {@\\
\mbox{}\verb@        wlevel <- level@\\
\mbox{}\verb@        wlevel <- 1 - (1 - level) / 2@\\
\mbox{}\verb@        CINT[] <- confint.default(object, level = wlevel)@\\
\mbox{}\verb@@\\
\mbox{}\verb@        sfun <- function(value, parm, quantile) @\\
\mbox{}\verb@        {@\\
\mbox{}\verb@            x <- object@\\
\mbox{}\verb@            alternative <- "two.sided"@\\
\mbox{}\verb@            tol <- .Machine$double.eps@\\
\mbox{}\verb@            @\hbox{$\langle\,${\itshape statistics}\nobreak\ {\footnotesize \NWlink{nuweb36a}{36a}}$\,\rangle$}\verb@@\\
\mbox{}\verb@            ### we also could invert p-values, but the@\\
\mbox{}\verb@            ### p-value function might be discrete for permutation@\\
\mbox{}\verb@            ### tests, in contrast to the test statistic@\\
\mbox{}\verb@            return(STATISTIC - quantile)@\\
\mbox{}\verb@        }@\\
\mbox{}\verb@@\\
\mbox{}\verb@        if (test == "Permutation") {@\\
\mbox{}\verb@            @\hbox{$\langle\,${\itshape permutation confint}\nobreak\ {\footnotesize \NWlink{nuweb56}{56}}$\,\rangle$}\verb@@\\
\mbox{}\verb@        } else {@\\
\mbox{}\verb@            qu <- rep.int(qchisq(level, df = 1), 2) ### always two.sided@\\
\mbox{}\verb@        }@\\
\mbox{}\verb@@\\
\mbox{}\verb@        for (p in parm) {@\\
\mbox{}\verb@            @\hbox{$\langle\,${\itshape confint lower}\nobreak\ {\footnotesize \NWlink{nuweb57}{57}}$\,\rangle$}\verb@@\\
\mbox{}\verb@            @\hbox{$\langle\,${\itshape confint upper}\nobreak\ {\footnotesize \NWlink{nuweb58}{58}}$\,\rangle$}\verb@@\\
\mbox{}\verb@        }@\\
\mbox{}\verb@    }@\\
\mbox{}\verb@@\\
\mbox{}\verb@    what <- match.arg(what)@\\
\mbox{}\verb@    CINT <- switch(what, "shift" = CINT,@\\
\mbox{}\verb@                         "PI" = object$link$parm2PI(CINT),@\\
\mbox{}\verb@                         "AUC" = object$link$parm2PI(CINT), ### same as PI @\\
\mbox{}\verb@                         "OVL" = object$link$parm2OVL(CINT))@\\
\mbox{}\verb@    return(CINT)@\\
\mbox{}\verb@}@\\
\mbox{}\verb@@{\NWsep}
\end{list}
\vspace{-1.5ex}
\footnotesize
\begin{list}{}{\setlength{\itemsep}{-\parsep}\setlength{\itemindent}{-\leftmargin}}
\item \NWtxtMacroRefIn\ \NWlink{nuweb25b}{25b}.

\item{}
\end{list}
\end{minipage}\vspace{4ex}
\end{flushleft}
As an example, we compute log-odds ratios for the table introduced above and
report some tests and confidence intervals:

<<free>>=
x
### asymptotic permutation test
(ft <- free1way(x))
coef(ft)
vcov(ft)
### Wald per parameter
summary(ft)
library("multcomp")
summary(glht(ft), test = univariate())

### global Wald
summary(ft, test = "Wald")
summary(glht(ft), test = Chisqtest())

### Rao score, Permutation score, LRT
summary(ft, test = "Rao")
summary(ft, test = "Permutation")
summary(ft, test = "LRT")

### Wald confidence intervals, unadjusted
confint(glht(ft), calpha = univariate_calpha())
confint(ft, test = "Wald")

### Rao and LRT intervals
confint(ft, test = "Rao")
confint(ft, test = "LRT")
@

\chapter{Special Test Procedures}

We now demonstrate that \code{free1way} produces the exact same results as
some of the classical test procedures implemented in the \pkg{stats}
package, and how the new implementation extends the existing functionality.

\section{Wilcoxon Test}

The first example is a Wilcoxon test for a single log-odds ratio
comparing to treatment groups. The Wilcoxon test is the score test in a
$2$-sample proportional odds model

<<wilcox-formula>>=
N <- 25
w <- gl(2, N)
y <- rlogis(length(w), location = c(0, 1)[w])

#### link = logit is default
ft <- free1way(y ~ w)

### Wald 
summary(ft)

### Permutation test
wilcox.test(y ~ w, alternative = "greater", correct = FALSE, exact = FALSE)$p.value
pvalue(wilcox_test(y ~ w, alternative = "greater"))
summary(ft, test = "Permutation", alternative = "less")$p.value
wilcox.test(y ~ w, alternative = "less", correct = FALSE, exact = FALSE)$p.value
pvalue(wilcox_test(y ~ w, alternative = "less"))
summary(ft, test = "Permutation", alternative = "greater")$p.value
wilcox.test(y ~ w, correct = FALSE, exact = FALSE)$p.value
kruskal.test(y ~ w)$p.value
pvalue(wilcox_test(y ~ w))
summary(ft, test = "Permutation")$p.value

### Wald tests
summary(ft, test = "Wald", alternative = "less")
summary(ft, test = "Wald", alternative = "greater")
summary(ft, test = "Wald")

### Rao score tests
summary(ft, test = "Rao", alternative = "less")
summary(ft, test = "Rao", alternative = "greater")
summary(ft, test = "Rao")

### LRT (only two-sided)
summary(ft, test = "LRT")

### confidence intervals for log-odds ratios
confint(ft, test = "Permutation")
confint(ft, test = "LRT")
confint(ft, test = "Wald")
confint(ft, test = "Rao")

### confidence interval for "Wilcoxon Parameter" = PI = AUC
confint(ft, test = "Rao", what = "AUC")

### comparison with rms::orm
library("rms")
rev(coef(or <- orm(y ~ w)))[1]
coef(ft)
logLik(or)
logLik(ft)
vcov(or)[2,2]
vcov(ft)
ci <- confint(or)
ci[nrow(ci),]
confint(ft, test = "Wald")
@


\section{Mantel-Haenszel Test}

The Cochran-Mantel-Haenszel test for conditional independence in $2 \times 2$
tables also relies on a proportional odds model.

<<mh>>=
example(mantelhaen.test, echo = FALSE)
mantelhaen.test(UCBAdmissions, correct = FALSE)
ft <- free1way(UCBAdmissions)
summary(ft, test = "Wald")
exp(coef(ft))
exp(confint(ft, test = "Wald"))
exp(sapply(dimnames(UCBAdmissions)[[3L]], function(dept)
       confint(free1way(UCBAdmissions[,,dept]), test = "Permutation")))
sapply(dimnames(UCBAdmissions)[[3L]], function(dept)
       fisher.test(UCBAdmissions[,,dept], conf.int = TRUE)$conf.int)
@

\section{\code{prop.test}}

For a single $2 \times 2$ table, all tests are nonparametric (as the model
is saturated) and therefore also inference procedures result in the same
$p$-values, for example.

<<pt>>=
prop.test(UCBAdmissions[,,1], correct = FALSE)
summary(free1way(UCBAdmissions[,,1]), test = "Rao")
@


\section{Kruskal-Wallis Test}

The Kruskal-Wallis test is the score test in a $K$-sample proportional odds
model

<<kw>>=
example(kruskal.test, echo = FALSE)
kruskal.test(x ~ g)
free1way(x ~ g)
@

\section{Savage Test}

The Savage test assumes proportional odds and, consequently, is the score
test in a proportional odds model. We start without censoring (Savage test) and add strata

<<sw>>=
library("survival")
N <- 10
nd <- expand.grid(g = gl(3, N), s = gl(3, N))
nd$tm <- rexp(nrow(nd))
nd$ev <- TRUE
cm <- coxph(Surv(tm, ev) ~ g + strata(s), data = nd)

(ft <- free1way(tm ~ g | s, data = nd, link = "cloglog"))
coef(cm)
coef(ft)
vcov(cm)
vcov(ft)
### Rao score tests
summary(cm)$sctest
summary(ft, test = "Rao")
### likelihood ratio tests
summary(cm)$logtest
summary(ft, test = "LRT")
### Wald tests
summary(cm)$waldtest
summary(ft, test = "Wald")
### asymptotic permutation tests
survdiff(Surv(tm, ev) ~ g + strata(s), data = nd, rho = 0)[c("chisq", "pvalue")]
summary(ft, test = "Permutation")
library("coin")
independence_test(Surv(tm, ev) ~ g | s, data = nd, ytrafo = function(...)
                  trafo(..., numeric_trafo = logrank_trafo, block = nd$s), 
                  teststat = "quad")
@

Wilcoxon against proportional odds

<<Peto>>=
survdiff(Surv(tm, ev) ~ g + strata(s), data = nd, rho = 1)[c("chisq", "pvalue")]
(ft <- free1way(tm ~ g | s, data = nd, link = "logit"))
summary(ft)
summary(ft, test = "Rao")
summary(ft, test = "LRT")
summary(ft, test = "Wald")
summary(ft, test = "Permutation")
@

\section{Log-rank Test}

And now with censoring. We cannot expect this to be identical with what
\pkg{survival} reports, as this package is based on the partial likelihood
and we operate on the full likelihood. 

<<sw>>=
library("survival")
data("GBSG2", package = "TH.data")
cm <- coxph(Surv(time, cens) ~ horTh + strata(tgrade), data = GBSG2)

ft <- with(GBSG2, free1way(Surv(time, cens) ~ horTh | tgrade, 
                           link = "cloglog"))
coef(cm)
coef(ft)
vcov(cm)
vcov(ft)
### Rao score tests
summary(cm)$sctest
summary(ft, test = "Rao")
### likelihood ratio tests
summary(cm)$logtest
summary(ft, test = "LRT")
### Wald tests
summary(cm)$waldtest
summary(ft, test = "Wald")
### asymptotic permutation tests
survdiff(Surv(time, cens) ~ horTh + strata(tgrade), data = GBSG2, rho = 0)[c("chisq", "pvalue")]
summary(ft, test = "Permutation")
independence_test(Surv(time, cens) ~ horTh | tgrade, data = GBSG2, ytrafo = function(...)
                  trafo(..., numeric_trafo = logrank_trafo, block = GBSG2$tgrade), 
                  teststat = "quad")
@

Wilcoxon against proportional odds

<<Peto>>=
survdiff(Surv(time, cens) ~ horTh + strata(tgrade), data = GBSG2, rho = 1)[c("chisq", "pvalue")]
(ft <- with(GBSG2, free1way(Surv(time, cens) ~ horTh | tgrade, 
                            link = "logit")))
summary(ft, test = "Rao")
summary(ft, test = "LRT")
summary(ft, test = "Wald")
summary(ft, test = "Permutation")
@

And now with more and smaller blocks

<<sw>>=
library("survival")
GBSG2$str <- cut(GBSG2$tsize, breaks = c(0, 1:9 * 10, Inf))
cm <- coxph(Surv(time, cens) ~ horTh + strata(str), data = GBSG2)

ft <- with(GBSG2, free1way(Surv(time, cens) ~ horTh | str, 
                           link = "cloglog"))
coef(cm)
coef(ft)
vcov(cm)
vcov(ft)
### Rao score tests
summary(cm)$sctest
summary(ft, test = "Rao")
### likelihood ratio tests
summary(cm)$logtest
summary(ft, test = "LRT")
### Wald tests
summary(cm)$waldtest
summary(ft, test = "Wald")
### asymptotic permutation tests
survdiff(Surv(time, cens) ~ horTh + strata(str), data = GBSG2, rho = 0)[c("chisq", "pvalue")]
summary(ft, test = "Permutation")
@

Wilcoxon against proportional odds

<<Peto>>=
survdiff(Surv(time, cens) ~ horTh + strata(str), data = GBSG2, rho = 1)[c("chisq", "pvalue")]
(ft <- with(GBSG2, free1way(Surv(time, cens) ~ horTh | str, 
                            link = "logit")))
summary(ft, test = "Rao")
summary(ft, test = "LRT")
summary(ft, test = "Wald")
summary(ft, test = "Permutation")
@


\section{van der Waerden Test}

Normal scores test against a generalised Cohen's $d$:

<<normal>>=
nd$y <- rnorm(nrow(nd))
free1way(y ~ g | s, data = nd, link = "probit")
independence_test(y ~ g | s, data = nd, ytrafo = function(...)
                  trafo(..., numeric_trafo = normal_trafo, block = nd$s), 
                  teststat = "quad")
@

\section{Friedman Test}

Each observation is a block in a $K$-sample proportional odds model

\begin{figure}
<<friedman-data, fig = TRUE>>=
example(friedman.test, echo = FALSE)

### Myles Hollander & Wolfe (2014, Example 7.1, page 294)
boxplot(RoundingTimes, xlab = "Method", ylab = "Rounding-First-Base Time", 
        las = 1)
matplot(t(RoundingTimes), add = TRUE, type = "l", 
        lty = 1, lwd = 2, col = rgb(.1, .1, .1, .1))

me <- colnames(RoundingTimes)
d <- expand.grid(me = factor(me, labels = me, levels = me),
                 id = factor(seq_len(nrow(RoundingTimes))))
d$time <- c(t(RoundingTimes))
@
\caption{Rounding-first-base time data.}
\end{figure}

<<friedman>>=
friedman.test(RoundingTimes)
(ft <- free1way(time ~ me | id, data = d))
@

<<friedman-add>>=
summary(ft)
library("multcomp")
glht(ft, linfct = mcp(me = "Tukey"))
@

<<friedman-link>>=
logLik(ft)
logLik(free1way(time ~ me | id, data = d, link = "probit"))
logLik(free1way(time ~ me | id, data = d, link = "cloglog"))
logLik(free1way(time ~ me | id, data = d, link = "loglog"))
@

Maybe proportional-hazards model better?

\section{McNemar Test}


<<McNemar>>=
example(mcnemar.test, echo = FALSE)
# set-up data frame with survey outcomes for voters
s <- gl(2, 1, labels = dimnames(Performance)[[1L]])
survey <- gl(2, 1, labels = c("1st", "2nd"))
nvoters <- c(Performance)
x <- expand.grid(survey = survey, voter = factor(seq_len(sum(nvoters))))
x$performance <- c(rep(s[c(1, 1)], nvoters[1]), rep(s[c(2, 1)], nvoters[2]),
                   rep(s[c(1, 2)], nvoters[3]), rep(s[c(2, 2)], nvoters[4]))
# note that only those voters changing their minds are relevant
mcn <- free1way(xtabs(~ performance + survey + voter, data = x))
# same result as mcnemar.test w/o continuity correction
print(mcn)
# X^2 statistic
summary(mcn, test = "Permutation")$statistic^2
mcnemar.test(Performance, correct = FALSE)
# Wald inference
summary(mcn)
confint(mcn, test = "Wald")
### because the model is saturated, the link function doesn't affect the
### p-value (but the coefficients are of course different)
free1way(xtabs(~ performance + survey + voter, data = x), link = "probit")
@


\section{Incomplete Block Designs}

\code{friedman.test} expects all blocks to be complete and
\code{kruskal.test} has no idea about blocks. When blocks are incomplete,
\code{free1way} can be employed. Replacing the normality assumption inherit
in \code{aov} \citep[Chapter~8.3.1. in][]{Meier2022} with a semiparametric
proportional odds model, we get
<<incomplete>>=
data("taste", package = "daewr")
### highly discrete
table(taste$score)
summary(free1way(score ~ recipe | panelist, data = taste))
@


\section{Contrast Tests}

\code{free1way} output can be used to define multiple contrast tests and
corresponding confidence intervals via the \pkg{multcomp} package. For
example, Tukey-style simultaneous all-pair comparisons can be implemented via
<<Tukey>>=
tk <- free1way(Ozone ~ Month, data = airquality)
library("multcomp")
confint(glht(tk, linfct = mcp(Month = "Tukey")))
@

\chapter{Model Diagnostics}

\section{Transformation Plots}

The model formulation~(\ref{model}) suggests a simple graphical check of the
main model assumption, that is, the existence of a constant shift on a latent
scale defined by $F$. For one block and two samples, the model reads
\begin{eqnarray*} 
F_Y(y \mid  \rT = 1) & = & F\left(F^{-1}(F_Y(y \mid \rT = 1))\right) = F(h(y)) \\
F_Y(y \mid  \rT = 2) & = & F\left(F^{-1}(F_Y(y \mid \rT = 1)) - 
                                  \delta_2\right) = F(h(y) - \delta_2).
\end{eqnarray*}
We can now contrast the conditional distributions obtained from this model
with the marginally estimated distribution functions of $Y$, that is,
nonparametric estimates for the two distribution functions obtained within
each treatment group separately. These latter estimates are typically simply the ECDF or
Kaplan-Meier estimators in the presence of right-censoring. It is easier to
see deviations from model~(\ref{model}) when the plot is presented on the
scale of the link function $F^{-1}$. If the control distribution is close to
$\hat{h}(y)$ and the distribution of those treated close to $\hat{h}(y) -
\hat{\delta}_2$, model~(\ref{model}) provides a good approximation. If the
two curves cross or if their horizontal distance varies considerably across
the sample space, we should be concerned.

The model is assumed to hold in each block with overall treatment effects
$\delta_k$, but the intercept function $h$ may differ between blocks.
Therefore, we produce such a plot for each block separately. We also do not
pay attention to the original observations of the outcome but plot the model
on the scale of the ranked outcomes (the model is invariant with respect to
monotone and therefore rank transformations).

All the necessary information can be extracted from an \code{object} of class
\code{free1way}. We extract the sub-table containing the data for
\code{block}, paying attention to possible right-censoring (in the four
dimension)

\begin{flushleft} \small
\begin{minipage}{\linewidth}\label{scrap63}\raggedright\small
\NWtarget{nuweb81}{} $\langle\,${\itshape extract plot data}\nobreak\ {\footnotesize {81}}$\,\rangle\equiv$
\vspace{-1ex}
\begin{list}{}{} \item
\mbox{}\verb@@\\
\mbox{}\verb@object <- x@\\
\mbox{}\verb@x <- object$table@\\
\mbox{}\verb@if (RC <- (length(dim(x)) == 4L)) {@\\
\mbox{}\verb@    x <- x[,,block,,drop = FALSE]@\\
\mbox{}\verb@    x <- x[marginSums(x, margin = 1) > 0,,,,drop = FALSE]@\\
\mbox{}\verb@} else {@\\
\mbox{}\verb@    x <- x[,,block,drop = FALSE]@\\
\mbox{}\verb@    x <- x[marginSums(x, margin = 1) > 0,,,drop = FALSE]@\\
\mbox{}\verb@}@\\
\mbox{}\verb@K <- dim(x)[2L]@\\
\mbox{}\verb@ret0 <- matrix(NA, nrow = dim(x)[1L], ncol = K)@\\
\mbox{}\verb@ln <- object$link@\\
\mbox{}\verb@@{\NWsep}
\end{list}
\vspace{-1.5ex}
\footnotesize
\begin{list}{}{\setlength{\itemsep}{-\parsep}\setlength{\itemindent}{-\leftmargin}}
\item \NWtxtMacroRefIn\ \NWlink{nuweb83d}{83d}.

\item{}
\end{list}
\end{minipage}\vspace{4ex}
\end{flushleft}
We then refit the intercept parameters (that is, $\hat{h}(y)$) for this
block re-using the treatment effects already contained in \code{object}. We
will plot them later on as a means to directly compare the model to the data

\begin{flushleft} \small
\begin{minipage}{\linewidth}\label{scrap64}\raggedright\small
\NWtarget{nuweb82a}{} $\langle\,${\itshape refit block intercepts}\nobreak\ {\footnotesize {82a}}$\,\rangle\equiv$
\vspace{-1ex}
\begin{list}{}{} \item
\mbox{}\verb@@\\
\mbox{}\verb@### refit for this block only@\\
\mbox{}\verb@m1 <- .free1wayML(x, link = ln, start = coef(object), @\\
\mbox{}\verb@                  fix = seq_along(coef(object)),@\\
\mbox{}\verb@                  residuals = FALSE, hessian = FALSE)@\\
\mbox{}\verb@intercepts <- m1$intercepts[[1L]]@\\
\mbox{}\verb@j1 <- which(attr(get("xlist", environment(m1$profile))[[1L]], "idx") > 1)@\\
\mbox{}\verb@j1 <- j1[-length(j1)]@\\
\mbox{}\verb@cf <- c(0, coef(object))@\\
\mbox{}\verb@@{\NWsep}
\end{list}
\vspace{-1.5ex}
\footnotesize
\begin{list}{}{\setlength{\itemsep}{-\parsep}\setlength{\itemindent}{-\leftmargin}}
\item \NWtxtMacroRefIn\ \NWlink{nuweb83d}{83d}.

\item{}
\end{list}
\end{minipage}\vspace{4ex}
\end{flushleft}
Last, we compute the marginal distributions, that is, the distribution
function of the outcome separately for each group. We could have used
\code{ecdf} or \code{survfit} here, but since we have everything available
in \code{.free1wayML}, we simply remove the observations corresponding to
other groups and refit the intercept parameters.

\begin{flushleft} \small
\begin{minipage}{\linewidth}\label{scrap65}\raggedright\small
\NWtarget{nuweb82b}{} $\langle\,${\itshape marginal fit}\nobreak\ {\footnotesize {82b}}$\,\rangle\equiv$
\vspace{-1ex}
\begin{list}{}{} \item
\mbox{}\verb@@\\
\mbox{}\verb@for (k in seq_len(K)) {@\\
\mbox{}\verb@    y <- x@\\
\mbox{}\verb@    if (RC) {@\\
\mbox{}\verb@        y[,-k,1,] <- 0@\\
\mbox{}\verb@    } else {@\\
\mbox{}\verb@        y[,-k,1] <- 0@\\
\mbox{}\verb@    }@\\
\mbox{}\verb@    start <- numeric(K - 1)@\\
\mbox{}\verb@    m0 <- .free1wayML(y, link = ln, start = start, @\\
\mbox{}\verb@                      fix = seq_len(K - 1), residuals = FALSE, @\\
\mbox{}\verb@                      hessian = FALSE)@\\
\mbox{}\verb@    j <- which(attr(get("xlist", environment(m0$profile))[[1L]], "idx") > 1)@\\
\mbox{}\verb@    ret0[j[-length(j)],k] <- m0$intercepts[[1L]]@\\
\mbox{}\verb@}@\\
\mbox{}\verb@@{\NWsep}
\end{list}
\vspace{-1.5ex}
\footnotesize
\begin{list}{}{\setlength{\itemsep}{-\parsep}\setlength{\itemindent}{-\leftmargin}}
\item \NWtxtMacroRefIn\ \NWlink{nuweb83d}{83d}.

\item{}
\end{list}
\end{minipage}\vspace{4ex}
\end{flushleft}
We can now setup a \code{plot} method for \code{free1way} objects, we begin
with an empty plot with appropriate axes annotations (we allow plotting on
the scale of the CDF as well):

\begin{flushleft} \small
\begin{minipage}{\linewidth}\label{scrap66}\raggedright\small
\NWtarget{nuweb82c}{} $\langle\,${\itshape setup canvas}\nobreak\ {\footnotesize {82c}}$\,\rangle\equiv$
\vspace{-1ex}
\begin{list}{}{} \item
\mbox{}\verb@@\\
\mbox{}\verb@if (cdf) {@\\
\mbox{}\verb@    ylim <- c(0, 1)@\\
\mbox{}\verb@    FUN <- function(x) ln$linkinv(x)@\\
\mbox{}\verb@} else {@\\
\mbox{}\verb@    ylim <- range(c(ret0, intercepts), na.rm = TRUE)@\\
\mbox{}\verb@    FUN <- function(x) x@\\
\mbox{}\verb@}@\\
\mbox{}\verb@@\\
\mbox{}\verb@idx <- seq_len(nrow(x))@\\
\mbox{}\verb@main <- list(...)$main@\\
\mbox{}\verb@if (is.null(main) && dim(object$table)[3L] > 1L)@\\
\mbox{}\verb@    main <- paste(names(dimnames(x))[3L], dimnames(x)[[3L]][1L], sep = "=")@\\
\mbox{}\verb@plot(idx, rep(0, length(idx)), type = "n", ylim = ylim, @\\
\mbox{}\verb@     xlab = paste("Rank(", names(dimnames(x))[1L], ")", sep = ""),@\\
\mbox{}\verb@     ylab = ifelse(cdf, "Probability", paste(ln$name, "Link")), @\\
\mbox{}\verb@     main = main, ...)@\\
\mbox{}\verb@@{\NWsep}
\end{list}
\vspace{-1.5ex}
\footnotesize
\begin{list}{}{\setlength{\itemsep}{-\parsep}\setlength{\itemindent}{-\leftmargin}}
\item \NWtxtMacroRefIn\ \NWlink{nuweb83d}{83d}.

\item{}
\end{list}
\end{minipage}\vspace{4ex}
\end{flushleft}
The marginally estimated functions are plotted first

\begin{flushleft} \small
\begin{minipage}{\linewidth}\label{scrap67}\raggedright\small
\NWtarget{nuweb83a}{} $\langle\,${\itshape marginal plot}\nobreak\ {\footnotesize {83a}}$\,\rangle\equiv$
\vspace{-1ex}
\begin{list}{}{} \item
\mbox{}\verb@@\\
\mbox{}\verb@out <- sapply(seq_len(K), function(k) @\\
\mbox{}\verb@    lines(which(!is.na(ret0[,k])), FUN(ret0[!is.na(ret0[,k]),k]), @\\
\mbox{}\verb@          type = "s", col = col[k], lty = lty[1]))@\\
\mbox{}\verb@@{\NWsep}
\end{list}
\vspace{-1.5ex}
\footnotesize
\begin{list}{}{\setlength{\itemsep}{-\parsep}\setlength{\itemindent}{-\leftmargin}}
\item \NWtxtMacroRefIn\ \NWlink{nuweb83d}{83d}.

\item{}
\end{list}
\end{minipage}\vspace{4ex}
\end{flushleft}
followed by a plot of the model-based functions (which can be switched off)

\begin{flushleft} \small
\begin{minipage}{\linewidth}\label{scrap68}\raggedright\small
\NWtarget{nuweb83b}{} $\langle\,${\itshape model plot}\nobreak\ {\footnotesize {83b}}$\,\rangle\equiv$
\vspace{-1ex}
\begin{list}{}{} \item
\mbox{}\verb@@\\
\mbox{}\verb@if (model)@\\
\mbox{}\verb@    out <- sapply(seq_len(K), function(k) @\\
\mbox{}\verb@        lines(j1, FUN(intercepts - cf[k]), type = "s", col = col[k], @\\
\mbox{}\verb@              lty = lty[2]))@\\
\mbox{}\verb@@{\NWsep}
\end{list}
\vspace{-1.5ex}
\footnotesize
\begin{list}{}{\setlength{\itemsep}{-\parsep}\setlength{\itemindent}{-\leftmargin}}
\item \NWtxtMacroRefIn\ \NWlink{nuweb83d}{83d}.

\item{}
\end{list}
\end{minipage}\vspace{4ex}
\end{flushleft}
and finally we add a legend

\begin{flushleft} \small
\begin{minipage}{\linewidth}\label{scrap69}\raggedright\small
\NWtarget{nuweb83c}{} $\langle\,${\itshape add legend}\nobreak\ {\footnotesize {83c}}$\,\rangle\equiv$
\vspace{-1ex}
\begin{list}{}{} \item
\mbox{}\verb@@\\
\mbox{}\verb@if (legend) {@\\
\mbox{}\verb@        legend("topleft", lty = lty[1], col = col, @\\
\mbox{}\verb@               legend = paste(names(dimnames(x))[2L], dimnames(x)[[2L]]),@\\
\mbox{}\verb@               title = "Nonparametric", bty = "n")@\\
\mbox{}\verb@        if (model) @\\
\mbox{}\verb@            legend("bottomright", lty = lty[2], col = col, @\\
\mbox{}\verb@                   legend = paste(names(dimnames(x))[2L], dimnames(x)[[2L]]),@\\
\mbox{}\verb@                   title = "Semiparametric", bty = "n")@\\
\mbox{}\verb@    }@\\
\mbox{}\verb@@{\NWsep}
\end{list}
\vspace{-1.5ex}
\footnotesize
\begin{list}{}{\setlength{\itemsep}{-\parsep}\setlength{\itemindent}{-\leftmargin}}
\item \NWtxtMacroRefIn\ \NWlink{nuweb83d}{83d}.

\item{}
\end{list}
\end{minipage}\vspace{4ex}
\end{flushleft}
We put everything together in a \code{plot} method

\begin{flushleft} \small
\begin{minipage}{\linewidth}\label{scrap70}\raggedright\small
\NWtarget{nuweb83d}{} $\langle\,${\itshape plot free1way}\nobreak\ {\footnotesize {83d}}$\,\rangle\equiv$
\vspace{-1ex}
\begin{list}{}{} \item
\mbox{}\verb@@\\
\mbox{}\verb@plot.free1way <- function(x, ..., block = 1L, cdf = FALSE, model = TRUE,@\\
\mbox{}\verb@                          col = seq_len(length(coef(object)) + 1L),@\\
\mbox{}\verb@                          lty = 1:2, legend = TRUE) @\\
\mbox{}\verb@{@\\
\mbox{}\verb@@\\
\mbox{}\verb@    @\hbox{$\langle\,${\itshape extract plot data}\nobreak\ {\footnotesize \NWlink{nuweb81}{81}}$\,\rangle$}\verb@@\\
\mbox{}\verb@    @\hbox{$\langle\,${\itshape refit block intercepts}\nobreak\ {\footnotesize \NWlink{nuweb82a}{82a}}$\,\rangle$}\verb@@\\
\mbox{}\verb@    @\hbox{$\langle\,${\itshape marginal fit}\nobreak\ {\footnotesize \NWlink{nuweb82b}{82b}}$\,\rangle$}\verb@@\\
\mbox{}\verb@@\\
\mbox{}\verb@    @\hbox{$\langle\,${\itshape setup canvas}\nobreak\ {\footnotesize \NWlink{nuweb82c}{82c}}$\,\rangle$}\verb@@\\
\mbox{}\verb@    @\hbox{$\langle\,${\itshape marginal plot}\nobreak\ {\footnotesize \NWlink{nuweb83a}{83a}}$\,\rangle$}\verb@@\\
\mbox{}\verb@    @\hbox{$\langle\,${\itshape model plot}\nobreak\ {\footnotesize \NWlink{nuweb83b}{83b}}$\,\rangle$}\verb@@\\
\mbox{}\verb@    @\hbox{$\langle\,${\itshape add legend}\nobreak\ {\footnotesize \NWlink{nuweb83c}{83c}}$\,\rangle$}\verb@@\\
\mbox{}\verb@}@\\
\mbox{}\verb@@{\NWsep}
\end{list}
\vspace{-1.5ex}
\footnotesize
\begin{list}{}{\setlength{\itemsep}{-\parsep}\setlength{\itemindent}{-\leftmargin}}
\item \NWtxtMacroRefIn\ \NWlink{nuweb25b}{25b}.

\item{}
\end{list}
\end{minipage}\vspace{4ex}
\end{flushleft}
By default, the plot shows the marginal (``nonparametric'') and model-based
(``semiparametric'') estimates in the sample plot. For the ozone
concentrations in different months in Figure~\ref{fig:ozone}, we see that the distributions differ,
but the effects can be well understood as shifts on a log-odds scale. Note
that the solid nonparametric curves agree quite well with the dashed
model-based ones.

\begin{figure}
<<plot-ex, fig = TRUE>>=
tk <- free1way(Ozone ~ Month, data = airquality)
plot(tk, las = 1)
@
\caption{Model diagnostics for proportional odds model comparing ozone
concentrations for different months. \label{fig:ozone}}
\end{figure}

We can check if the plot is correct by comparing the result
(Figure~\ref{fig:ozonecdf}) to the one
obtained with \code{ecdfplot} after rank transformation on the scale of the
distribution functions (Figure~\ref{fig:ozoneecdf})

\begin{figure}
<<plot-ex-cdf, fig = TRUE>>=
plot(tk, cdf = TRUE, model = FALSE, las = 1)
@
\caption{Nonparametric distributions of ozone concentrations for different months. \label{fig:ozonecdf}}
\end{figure}

\begin{figure}
<<plot-ex-ecdf, fig = TRUE>>=
aq <- subset(airquality, !is.na(Ozone))
aq$r <- match(aq$Ozone, sort(unique(aq$Ozone)))
library("latticeExtra")
plot(ecdfplot(~ r, data = aq, groups = Month, col = 1:5))
@
\caption{Nonparametric distributions of ozone concentrations for different months.
\label{fig:ozoneecdf}}
\end{figure}


\section{Probability-probability Plots}

The classical shift model $F_Y(y \mid T = 2) = F_Y(y - \mu \mid T = 1)$
can be criticised using confidence bands for QQ-plots in \code{qqplot},
because the parameter $\mu$ shows up as a vertical shift of the diagonal
if the model is appropriate.

Likewise, model~(\ref{model}) can be graphically assessed using the P-P-plot.
We concentrate on the two-sample case. The shift parameter $\delta_2$ gives
rise to the model-based P-P graph $(p, F(F^{-1}(p) - \delta_2))$ and a
confidence \emph{band} can be obtained from a confidence \emph{interval} for
$\delta_2$. The P-P-plot is, up to rescalings, identical to the ROC curve.

\begin{flushleft} \small
\begin{minipage}{\linewidth}\label{scrap71}\raggedright\small
\NWtarget{nuweb87}{} $\langle\,${\itshape ROC bands}\nobreak\ {\footnotesize {87}}$\,\rangle\equiv$
\vspace{-1ex}
\begin{list}{}{} \item
\mbox{}\verb@@\\
\mbox{}\verb@ if (!is.null(conf.level)) {@\\
\mbox{}\verb@    prb <- seq_len(1000) / 1001@\\
\mbox{}\verb@    res <- c(x, y)@\\
\mbox{}\verb@    grp <- gl(2, 1, labels = c(xlab, ylab))@\\
\mbox{}\verb@    grp <- grp[rep(1:2, c(length(x), length(y)))]@\\
\mbox{}\verb@    args <- conf.args@\\
\mbox{}\verb@    args$y <- res@\\
\mbox{}\verb@    args$groups <- grp@\\
\mbox{}\verb@    args$border <- args$col <- args$type <- NULL@\\
\mbox{}\verb@    f1w <- do.call("free1way", args)@\\
\mbox{}\verb@@\\
\mbox{}\verb@    ci <- confint(f1w, level = conf.level, type = args$type)@\\
\mbox{}\verb@    lwr <- .p(f1w$link, .q(f1w$link, prb) - ci[1,1])@\\
\mbox{}\verb@    upr <- .p(f1w$link, .q(f1w$link, prb) - ci[1,2])@\\
\mbox{}\verb@    x <- c(prb, rev(prb))@\\
\mbox{}\verb@    y <- c(lwr, rev(upr))@\\
\mbox{}\verb@    xn <- c(x[1L], rep(x[-1L], each = 2))@\\
\mbox{}\verb@    yn <- c(rep(y[-length(y)], each = 2), y[length(y)])@\\
\mbox{}\verb@    polygon(x = xn, y = yn, col = conf.args$col, border = conf.args$border)@\\
\mbox{}\verb@    lines(prb, .p(f1w$link, .q(f1w$link, prb) - coef(f1w)))@\\
\mbox{}\verb@}@\\
\mbox{}\verb@@{\NWsep}
\end{list}
\vspace{-1.5ex}
\footnotesize
\begin{list}{}{\setlength{\itemsep}{-\parsep}\setlength{\itemindent}{-\leftmargin}}
\item \NWtxtMacroRefIn\ \NWlink{nuweb88}{88}.

\item{}
\end{list}
\end{minipage}\vspace{4ex}
\end{flushleft}
We introduce a new function \code{ppplot}, closely following the
implementation of \code{qqplot}, allowing to plot the empirical 
\citep{WilkGnanadesikan1968} and
corresponding model-based \citep{SewakHothorn2023} probability-probability plot, 
the latter for a certain choice of link function:

\begin{flushleft} \small
\begin{minipage}{\linewidth}\label{scrap72}\raggedright\small
\NWtarget{nuweb88}{} $\langle\,${\itshape ppplot}\nobreak\ {\footnotesize {88}}$\,\rangle\equiv$
\vspace{-1ex}
\begin{list}{}{} \item
\mbox{}\verb@@\\
\mbox{}\verb@ppplot <- function(x, y, plot.it = TRUE,@\\
\mbox{}\verb@                   xlab = paste("Cumulative probabilities for", @\\
\mbox{}\verb@                                deparse1(substitute(x))),@\\
\mbox{}\verb@                   ylab = paste("Cumulative probabilities for", @\\
\mbox{}\verb@                                deparse1(substitute(y))), @\\
\mbox{}\verb@                   main = "P-P plot",@\\
\mbox{}\verb@                   ..., conf.level = NULL, @\\
\mbox{}\verb@                   conf.args = list(link = "logit", type = "Wald", @\\
\mbox{}\verb@                                    col = NA, border = NULL)) @\\
\mbox{}\verb@{@\\
\mbox{}\verb@@\\
\mbox{}\verb@    force(xlab)@\\
\mbox{}\verb@    force(ylab)@\\
\mbox{}\verb@    if (xlab == ylab) {@\\
\mbox{}\verb@        xlab <- paste0("x = ", xlab)@\\
\mbox{}\verb@        ylab <- paste0("y = ", ylab)@\\
\mbox{}\verb@    }@\\
\mbox{}\verb@@\\
\mbox{}\verb@    ex <- ecdf(x)@\\
\mbox{}\verb@    sy <- sort(unique(c(x, y)))@\\
\mbox{}\verb@    py <- ecdf(y)(sy)@\\
\mbox{}\verb@    px <- ex(sy)@\\
\mbox{}\verb@    ret <- stepfun(px, c(0, py))@\\
\mbox{}\verb@    if (!plot.it)@\\
\mbox{}\verb@        return(ret)@\\
\mbox{}\verb@@\\
\mbox{}\verb@    plot(ret, xlim = c(0, 1), ylim = c(0, 1), @\\
\mbox{}\verb@         xlab = xlab, ylab = ylab, main = main, @\\
\mbox{}\verb@         verticals = FALSE, ...)@\\
\mbox{}\verb@@\\
\mbox{}\verb@    @\hbox{$\langle\,${\itshape ROC bands}\nobreak\ {\footnotesize \NWlink{nuweb87}{87}}$\,\rangle$}\verb@@\\
\mbox{}\verb@@\\
\mbox{}\verb@    plot(ret, add = TRUE, verticals = FALSE, ...)@\\
\mbox{}\verb@@\\
\mbox{}\verb@    return(invisible(ret)) @\\
\mbox{}\verb@}@\\
\mbox{}\verb@@{\NWsep}
\end{list}
\vspace{-1.5ex}
\footnotesize
\begin{list}{}{\setlength{\itemsep}{-\parsep}\setlength{\itemindent}{-\leftmargin}}
\item \NWtxtMacroRefIn\ \NWlink{nuweb25b}{25b}.

\item{}
\end{list}
\end{minipage}\vspace{4ex}
\end{flushleft}
A correct logistic model with log-odds ratio three is shown in Figure~\ref{fig:PO}
and an incorrect proportional hazards model for the same data in
Figure~\ref{fig:PH}.

\begin{figure}
<<ppplot, fig = TRUE>>=
y <- rlogis(50)
x <- rlogis(50, location = 3)
ppplot(y, x, conf.level = .95, las = 1)
@
\caption{Data sampled from a proportional-odds model with
probability-probability (P-P)  curve and $95\%$ confidence band obtained from
a proportional-odds model. \label{fig:PO}}
\end{figure}


\begin{figure}
<<ppplot-savage, fig = TRUE>>=
ppplot(y, x, conf.args = list(link = "cloglog", type = "Wald", 
                              col = NA, border = NULL),
       conf.level = .95, las = 1)
@
\caption{Data sampled from a proportional-odds model with
probability-probability (P-P)  curve and $95\%$ confidence band obtained from
a proportional-hazards model. \label{fig:PH}}
\end{figure}


\chapter{Random Number Generation} \label{ch:rng}

With~\ref{model} we know that for an absolutely continuous random variable
$Y$
\begin{eqnarray*}
U = F_Y(Y \mid  \rT = k, \rS = b) = F\left(F^{-1}\left(F_Y(Y \mid \rT = 1, \rS = b)\right) - \delta_k\right), \quad k = 2, \dots, K
\end{eqnarray*}
follows a standard uniform distribution on the unit interval. This means
that we can sample from the distribution of $Y$ using
\begin{eqnarray*}
F_Y^{-1}\left(F(F^{-1}(U) + \delta_k) \mid \rT = 1, \rS = b)\right).
\end{eqnarray*}
It is therefore enough to draw samples from $F(F^{-1}(U) + \delta_k)$, that
is, assuming a uniform distribution for $F_Y$ in each control group. Because
of the invariance with respect to monotone transformations, transforming all
observations by the same quantile function changes the outcome distributions
but not the shift effects. Discrete outcomes can be generated
by post-hoc categorisation.

\begin{flushleft} \small
\begin{minipage}{\linewidth}\label{scrap73}\raggedright\small
\NWtarget{nuweb91}{} $\langle\,${\itshape design args}\nobreak\ {\footnotesize {91}}$\,\rangle\equiv$
\vspace{-1ex}
\begin{list}{}{} \item
\mbox{}\verb@@\\
\mbox{}\verb@K <- length(delta) + 1L@\\
\mbox{}\verb@if (is.null(names(delta))) @\\
\mbox{}\verb@    names(delta) <- LETTERS[seq_len(K)[-1]]@\\
\mbox{}\verb@if (length(alloc_ratio) == 1L) @\\
\mbox{}\verb@    alloc_ratio <- rep_len(alloc_ratio, K - 1)@\\
\mbox{}\verb@if (length(alloc_ratio) != K - 1L)@\\
\mbox{}\verb@    stop(gettextf("invalid argument '%s'", "alloc_ratio"), domain = NA)@\\
\mbox{}\verb@if (length(strata_ratio) == 1L) @\\
\mbox{}\verb@    strata_ratio <- rep_len(strata_ratio, B - 1)@\\
\mbox{}\verb@if (length(strata_ratio) != B - 1L)@\\
\mbox{}\verb@    stop(gettextf("invalid argument '%s'", "strata_ratio"), domain = NA)@\\
\mbox{}\verb@### sample size per group (columns) and stratum (rows)@\\
\mbox{}\verb@N <- n * matrix(c(1, alloc_ratio), nrow = B, ncol = K, byrow = TRUE) * @\\
\mbox{}\verb@         matrix(c(1, strata_ratio), nrow = B, ncol = K)@\\
\mbox{}\verb@@{\NWsep}
\end{list}
\vspace{-1.5ex}
\footnotesize
\begin{list}{}{\setlength{\itemsep}{-\parsep}\setlength{\itemindent}{-\leftmargin}}
\item \NWtxtMacroRefIn\ \NWlink{nuweb92}{92}\NWlink{nuweb97a}{, 97a}.

\item{}
\end{list}
\end{minipage}\vspace{4ex}
\end{flushleft}
\begin{flushleft} \small
\begin{minipage}{\linewidth}\label{scrap74}\raggedright\small
\NWtarget{nuweb92}{} $\langle\,${\itshape rfree1way}\nobreak\ {\footnotesize {92}}$\,\rangle\equiv$
\vspace{-1ex}
\begin{list}{}{} \item
\mbox{}\verb@@\\
\mbox{}\verb@.rfree1way <- function(n, delta = 0, link = c("logit", "probit", @\\
\mbox{}\verb@                                              "cloglog", "loglog")) @\\
\mbox{}\verb@{@\\
\mbox{}\verb@@\\
\mbox{}\verb@    logU <- log(ret <- runif(n))@\\
\mbox{}\verb@@\\
\mbox{}\verb@    @\hbox{$\langle\,${\itshape link2fun}\nobreak\ {\footnotesize \NWlink{nuweb45}{45}}$\,\rangle$}\verb@@\\
\mbox{}\verb@@\\
\mbox{}\verb@    trt <- (abs(delta) > 0)@\\
\mbox{}\verb@    ret[trt] <- .p(link, .q(link, logU[trt], log.p = TRUE) + delta[trt])@\\
\mbox{}\verb@@\\
\mbox{}\verb@    return(ret)@\\
\mbox{}\verb@}@\\
\mbox{}\verb@@\\
\mbox{}\verb@rfree1way <- function(n, prob = NULL, alloc_ratio = 1, @\\
\mbox{}\verb@                      blocks = ifelse(is.null(prob), 1, NCOL(prob)), @\\
\mbox{}\verb@                      strata_ratio = 1, delta = 0, offset = 0, @\\
\mbox{}\verb@                      link = c("logit", "probit", "cloglog", "loglog"))@\\
\mbox{}\verb@{@\\
\mbox{}\verb@@\\
\mbox{}\verb@    B <- blocks@\\
\mbox{}\verb@@\\
\mbox{}\verb@    @\hbox{$\langle\,${\itshape design args}\nobreak\ {\footnotesize \NWlink{nuweb91}{91}}$\,\rangle$}\verb@@\\
\mbox{}\verb@@\\
\mbox{}\verb@    rownames(N) <- paste0("block", seq_len(B))@\\
\mbox{}\verb@    ctrl <- "Control"@\\
\mbox{}\verb@    colnames(N) <- c(ctrl, names(delta))@\\
\mbox{}\verb@@\\
\mbox{}\verb@    if (length(offset) != K)@\\
\mbox{}\verb@        offset <- rep_len(offset, K)@\\
\mbox{}\verb@@\\
\mbox{}\verb@    trt <- gl(K, 1, labels = colnames(N))@\\
\mbox{}\verb@    blk <- gl(B, 1, labels = rownames(N))@\\
\mbox{}\verb@    ret <- expand.grid(groups = trt, blocks = blk)@\\
\mbox{}\verb@    if (B == 1L) ret$blocks <- NULL@\\
\mbox{}\verb@    ret <- ret[rep(seq_len(nrow(ret)), times = N), , drop = FALSE]@\\
\mbox{}\verb@    ret$y <- .rfree1way(nrow(ret), @\\
\mbox{}\verb@                        delta = offset[ret$groups] + c(0, delta)[ret$groups], @\\
\mbox{}\verb@                        link = link)@\\
\mbox{}\verb@    if (is.null(prob)) return(ret)@\\
\mbox{}\verb@@\\
\mbox{}\verb@    ### return discrete distribution@\\
\mbox{}\verb@    if (!is.matrix(prob))@\\
\mbox{}\verb@        prob <- matrix(prob, nrow = NROW(prob), ncol = B)@\\
\mbox{}\verb@    if (ncol(prob) != B)@\\
\mbox{}\verb@        stop(gettextf("incorrect number of columns for 'prob' in %s",@\\
\mbox{}\verb@                      "rfree1way"),@\\
\mbox{}\verb@             domain = NA)@\\
\mbox{}\verb@    prob <- prop.table(prob, margin = 2L)@\\
\mbox{}\verb@    ret <- do.call("rbind", lapply(1:ncol(prob), function(b) {@\\
\mbox{}\verb@        if (B > 1)@\\
\mbox{}\verb@            ret <- subset(ret, blocks == levels(blocks)[b])@\\
\mbox{}\verb@        ret$y <- cut(ret$y, breaks = c(-Inf, cumsum(prob[,b])), @\\
\mbox{}\verb@                     labels = paste0("Y", 1:nrow(prob)), ordered_result = TRUE)@\\
\mbox{}\verb@        ret@\\
\mbox{}\verb@    }))@\\
\mbox{}\verb@    return(ret)@\\
\mbox{}\verb@}@\\
\mbox{}\verb@@{\NWsep}
\end{list}
\vspace{-1.5ex}
\footnotesize
\begin{list}{}{\setlength{\itemsep}{-\parsep}\setlength{\itemindent}{-\leftmargin}}
\item \NWtxtMacroRefIn\ \NWlink{nuweb25b}{25b}.

\item{}
\end{list}
\end{minipage}\vspace{4ex}
\end{flushleft}
<<rfree1way>>=
(logOR <- c(log(1.5), log(2)))
nd <- rfree1way(150, delta = logOR)
coef(ft <- free1way(y ~ groups, data = nd))
sqrt(diag(vcov(ft)))
logLik(ft)
nd$y <- qchisq(nd$y, df = 3)
coef(ft <- free1way(y ~ groups, data = nd))
sqrt(diag(vcov(ft)))
logLik(ft)
N <- 25
pvals <- replicate(Nsim, 
{
  nd <- rfree1way(n = N, blocks = 2, delta = c(.25, .5), alloc_ratio = 2)
  summary(free1way(y ~ groups | blocks, data = nd), test = "Permutation")$p.value
})

power.free1way.test(n = N, blocks = 2, delta = c(.25, .5), alloc_ratio = 2)
mean(pvals < .05)
@

This function can also be used to simulate survival times, for example, from
a proportional hazards model with a censoring probability of $.25$ in the
control arm and of $.5$ in the treated arm, under random censoring (that is,
event and censoring times are independent given treatment).

<<rfree1waysurv>>=
N <- 1000
nd <- rfree1way(N, delta = 1, link = "cloglog")
nd$C <- rfree1way(n = N, delta = 1, offset = -c(qlogis(.25), qlogis(.5)), 
                  link = "cloglog")$y
nd$y <- Surv(pmin(nd$y, nd$C), nd$y < nd$C)
### check censoring probability
1 - tapply(nd$y[,2], nd$groups, mean)
summary(free1way(y ~ groups, data = nd, link = "cloglog"))
summary(coxph(y ~ groups, data = nd))
@

Next we start implementing a function for simulating $C \times K$ tables. We need
to specify the number of observations in each treatment group (\code{c}),
the discrete distribution of the control (\code{r}), a model (\code{link}), and a
treatment effect (\code{delta}, in line with \code{power.XYZ.test}). In
essence, we draw samples from the multinomial distribution after computing
the relevant discrete density.

\begin{flushleft} \small
\begin{minipage}{\linewidth}\label{scrap75}\raggedright\small
\NWtarget{nuweb95}{} $\langle\,${\itshape r2dsim}\nobreak\ {\footnotesize {95}}$\,\rangle\equiv$
\vspace{-1ex}
\begin{list}{}{} \item
\mbox{}\verb@@\\
\mbox{}\verb@.r2dsim <- function(n, r, c, delta = 0,@\\
\mbox{}\verb@                   link = c("logit", "probit", "cloglog", "loglog")) @\\
\mbox{}\verb@{@\\
\mbox{}\verb@@\\
\mbox{}\verb@    if (length(n <- as.integer(n)) == 0L || (n < 0) || is.na(n)) @\\
\mbox{}\verb@        stop(gettextf("invalid argument '%s'", "n"), domain = NA)@\\
\mbox{}\verb@    colsums <- c@\\
\mbox{}\verb@    if (length(colsums[] <- as.integer(c)) <= 1L || @\\
\mbox{}\verb@        any(colsums < 0) || anyNA(colsums)) @\\
\mbox{}\verb@        stop(gettextf("invalid argument '%s'", "c"), domain = NA)@\\
\mbox{}\verb@@\\
\mbox{}\verb@    prob <- r@\\
\mbox{}\verb@    if (length(prob[] <- as.double(r / sum(r))) <= 1L || @\\
\mbox{}\verb@        any(prob < 0) || anyNA(prob)) @\\
\mbox{}\verb@        stop(gettextf("invalid argument '%s'", "r"), domain = NA)@\\
\mbox{}\verb@@\\
\mbox{}\verb@    if (is.null(names(prob))) @\\
\mbox{}\verb@        names(prob) <- paste0("i", seq_along(prob))@\\
\mbox{}\verb@    @\\
\mbox{}\verb@    K <- length(colsums)@\\
\mbox{}\verb@    if (is.null(names(colsums))) @\\
\mbox{}\verb@        names(colsums) <- LETTERS[seq_len(K)]@\\
\mbox{}\verb@    delta <- rep_len(delta, K - 1L)@\\
\mbox{}\verb@@\\
\mbox{}\verb@    @\hbox{$\langle\,${\itshape link2fun}\nobreak\ {\footnotesize \NWlink{nuweb45}{45}}$\,\rangle$}\verb@@\\
\mbox{}\verb@@\\
\mbox{}\verb@    p0 <- cumsum(prob)@\\
\mbox{}\verb@    h0 <- .q(link, p0[-length(p0)]) ### last element of p0 is one@\\
\mbox{}\verb@@\\
\mbox{}\verb@    h1 <- h0 - matrix(delta, nrow = length(prob) - 1L, ncol = K - 1, @\\
\mbox{}\verb@                      byrow = TRUE)@\\
\mbox{}\verb@    p1 <- rbind(.p(link, h1), 1)@\\
\mbox{}\verb@    p <- cbind(p0, p1)@\\
\mbox{}\verb@    ret <- vector(mode = "list", length = n)@\\
\mbox{}\verb@@\\
\mbox{}\verb@    for (i in seq_len(n)) {@\\
\mbox{}\verb@        tab <- sapply(seq_len(K), function(k)@\\
\mbox{}\verb@            unclass(table(cut(runif(colsums[k]), breaks = c(-Inf, p[,k])))))@\\
\mbox{}\verb@        ret[[i]] <- as.table(array(unlist(tab), dim = c(length(prob), K), @\\
\mbox{}\verb@                          dimnames = list(names(prob), @\\
\mbox{}\verb@                                          names(colsums))))@\\
\mbox{}\verb@    }@\\
\mbox{}\verb@    return(ret)@\\
\mbox{}\verb@}@\\
\mbox{}\verb@@{\NWsep}
\end{list}
\vspace{-1.5ex}
\footnotesize
\begin{list}{}{\setlength{\itemsep}{-\parsep}\setlength{\itemindent}{-\leftmargin}}
\item \NWtxtMacroRefIn\ \NWlink{nuweb100}{100}.

\item{}
\end{list}
\end{minipage}\vspace{4ex}
\end{flushleft}
\chapter{Power and Sample Size}

The term ``distribution-free'' refers to the invariance of the reference
distribution with respect to the distribution of an absolutely continuous
outcome under control. Unfortunately, this is no longer true for
non-continuous outcomes (due to ties) and under the alternative. That means
that sample size assessments always take place under certain assumptions
regarding the outcome distribution.

With the infrastructure from Chapter~\ref{ch:rng}, 
we are now ready to put together a function for power evaluation and sample
size assessment. The core idea is to draw samples from the relevant data
(under a specific model in the alternative) and to estimate the Fisher
information of the treatment effect parameters for this configuration. The
power of the global Wald test can than be approximated by a non-central
$\chi^2$ distribution. This is much faster than approximating the power
directly. Nevertheless, this is a random experiment, so we first make
computations reproducible:

\begin{flushleft} \small
\begin{minipage}{\linewidth}\label{scrap76}\raggedright\small
\NWtarget{nuweb96}{} $\langle\,${\itshape random seed}\nobreak\ {\footnotesize {96}}$\,\rangle\equiv$
\vspace{-1ex}
\begin{list}{}{} \item
\mbox{}\verb@@\\
\mbox{}\verb@if (!exists(".Random.seed", envir = .GlobalEnv, inherits = FALSE)) @\\
\mbox{}\verb@    runif(1)@\\
\mbox{}\verb@if (is.null(seed)) @\\
\mbox{}\verb@    seed <- RNGstate <- get(".Random.seed", envir = .GlobalEnv)@\\
\mbox{}\verb@else {@\\
\mbox{}\verb@    R.seed <- get(".Random.seed", envir = .GlobalEnv)@\\
\mbox{}\verb@    set.seed(seed)@\\
\mbox{}\verb@    RNGstate <- structure(seed, kind = as.list(RNGkind()))@\\
\mbox{}\verb@    on.exit(assign(".Random.seed", R.seed, envir = .GlobalEnv))@\\
\mbox{}\verb@}@\\
\mbox{}\verb@@{\NWsep}
\end{list}
\vspace{-1.5ex}
\footnotesize
\begin{list}{}{\setlength{\itemsep}{-\parsep}\setlength{\itemindent}{-\leftmargin}}
\item \NWtxtMacroRefIn\ \NWlink{nuweb100}{100}.

\item{}
\end{list}
\end{minipage}\vspace{4ex}
\end{flushleft}
\begin{flushleft} \small
\begin{minipage}{\linewidth}\label{scrap77}\raggedright\small
\NWtarget{nuweb97a}{} $\langle\,${\itshape power setup}\nobreak\ {\footnotesize {97a}}$\,\rangle\equiv$
\vspace{-1ex}
\begin{list}{}{} \item
\mbox{}\verb@@\\
\mbox{}\verb@@\\
\mbox{}\verb@@\hbox{$\langle\,${\itshape link2fun}\nobreak\ {\footnotesize \NWlink{nuweb45}{45}}$\,\rangle$}\verb@@\\
\mbox{}\verb@@\\
\mbox{}\verb@### matrix means control distributions in different strata@\\
\mbox{}\verb@if (!is.matrix(prob))@\\
\mbox{}\verb@    prob <- matrix(prob, nrow = NROW(prob), ncol = blocks)@\\
\mbox{}\verb@prob <- prop.table(prob, margin = 2L)@\\
\mbox{}\verb@C <- nrow(prob)@\\
\mbox{}\verb@B <- ncol(prob)@\\
\mbox{}\verb@if (is.null(colnames(prob))) @\\
\mbox{}\verb@    colnames(prob) <- paste0("stratum", seq_len(B))@\\
\mbox{}\verb@p0 <- apply(prob, 2, cumsum)@\\
\mbox{}\verb@h0 <- .q(link, p0[-nrow(p0),,drop = FALSE])@\\
\mbox{}\verb@@\\
\mbox{}\verb@@\hbox{$\langle\,${\itshape design args}\nobreak\ {\footnotesize \NWlink{nuweb91}{91}}$\,\rangle$}\verb@@\\
\mbox{}\verb@@\\
\mbox{}\verb@rownames(N) <- colnames(prob)@\\
\mbox{}\verb@ctrl <- "Control"@\\
\mbox{}\verb@dn <- dimnames(prob)@\\
\mbox{}\verb@if (!is.null(names(dn)[1L]))@\\
\mbox{}\verb@    ctrl <- names(dn)[1L]@\\
\mbox{}\verb@colnames(N) <- c(ctrl, names(delta))@\\
\mbox{}\verb@@{\NWsep}
\end{list}
\vspace{-1.5ex}
\footnotesize
\begin{list}{}{\setlength{\itemsep}{-\parsep}\setlength{\itemindent}{-\leftmargin}}
\item \NWtxtMacroRefIn\ \NWlink{nuweb100}{100}.

\item{}
\end{list}
\end{minipage}\vspace{4ex}
\end{flushleft}
For estimating the Fisher information, we draw samples from the discrete
outcome distribution and evaluate the observed Fisher information for the,
here and now known true parameters. The average of these Fisher information
matrices is then used as an estimate for the expected Fisher information.
For small sample sizes less than $100$, we draw larger samples (at least
$1000$) and adjust the obtained Fisher information accordingly to reduce
sampling error.

\begin{flushleft} \small
\begin{minipage}{\linewidth}\label{scrap78}\raggedright\small
\NWtarget{nuweb97b}{} $\langle\,${\itshape estimate Fisher information}\nobreak\ {\footnotesize {97b}}$\,\rangle\equiv$
\vspace{-1ex}
\begin{list}{}{} \item
\mbox{}\verb@@\\
\mbox{}\verb@he <- 0@\\
\mbox{}\verb@deltamu <- delta - mu@\\
\mbox{}\verb@Nboost <- ifelse(n < 100, ceiling(1000 / n), 1)@\\
\mbox{}\verb@for (i in seq_len(nsim)) {@\\
\mbox{}\verb@    parm <- deltamu@\\
\mbox{}\verb@    x <- as.table(array(0, dim = c(C, K, B)))@\\
\mbox{}\verb@    for (b in seq_len(B)) {@\\
\mbox{}\verb@        x[,,b] <- .r2dsim(1L, r = prob[, b], c = Nboost * N[b,], @\\
\mbox{}\verb@                          delta = delta, link = link)[[1L]]@\\
\mbox{}\verb@        rs <- which(.rowSums(x[,,b], m = dim(x)[1L], n = dim(x)[2L]) > 0)@\\
\mbox{}\verb@        theta <- h0[pmin(nrow(h0), rs), b]@\\
\mbox{}\verb@        parm <- c(parm, theta[-length(theta)])@\\
\mbox{}\verb@    }@\\
\mbox{}\verb@    ### evaluate observed hessian for true parameters parm and x data@\\
\mbox{}\verb@    he <- he + .free1wayML(x, link = link, mu = mu, start = parm, @\\
\mbox{}\verb@                           fix = seq_along(parm))$hessian / Nboost@\\
\mbox{}\verb@}@\\
\mbox{}\verb@### estimate expected Fisher information@\\
\mbox{}\verb@he <- he / nsim@\\
\mbox{}\verb@@{\NWsep}
\end{list}
\vspace{-1.5ex}
\footnotesize
\begin{list}{}{\setlength{\itemsep}{-\parsep}\setlength{\itemindent}{-\leftmargin}}
\item \NWtxtMacroRefIn\ \NWlink{nuweb100}{100}.

\item{}
\end{list}
\end{minipage}\vspace{4ex}
\end{flushleft}
The power function now depends on sample size (\code{n}; the number of
control observations in the first stratum), a discrete control distribution
(\code{prob}, this can be a $C \times B$ matrix for stratum-specific control
distributions), a vector of allocation ratios (\code{alloc_ratio = 2} means
control:treatment = 1:2) and the sample size ratios between strata.

The treatment effects are contained in $K - 1$ vector \code{delta}:

\begin{flushleft} \small
\begin{minipage}{\linewidth}\label{scrap79}\raggedright\small
\NWtarget{nuweb98a}{} $\langle\,${\itshape power call}\nobreak\ {\footnotesize {98a}}$\,\rangle\equiv$
\vspace{-1ex}
\begin{list}{}{} \item
\mbox{}\verb@@\\
\mbox{}\verb@power.free1way.test(n = n, prob = prob, @\\
\mbox{}\verb@                    alloc_ratio = alloc_ratio,  @\\
\mbox{}\verb@                    blocks = blocks,@\\
\mbox{}\verb@                    strata_ratio = strata_ratio, @\\
\mbox{}\verb@                    delta = delta, mu = mu,@\\
\mbox{}\verb@                    sig.level = sig.level, link = link, @\\
\mbox{}\verb@                    alternative = alternative, @\\
\mbox{}\verb@                    nsim = nsim, seed = seed, @\\
\mbox{}\verb@                    tol = tol)$power - power@\\
\mbox{}\verb@@{\NWsep}
\end{list}
\vspace{-1.5ex}
\footnotesize
\begin{list}{}{\setlength{\itemsep}{-\parsep}\setlength{\itemindent}{-\leftmargin}}
\item \NWtxtMacroRefIn\ \NWlink{nuweb99}{99}.

\item{}
\end{list}
\end{minipage}\vspace{4ex}
\end{flushleft}
\begin{flushleft} \small
\begin{minipage}{\linewidth}\label{scrap80}\raggedright\small
\NWtarget{nuweb98b}{} $\langle\,${\itshape power args check}\nobreak\ {\footnotesize {98b}}$\,\rangle\equiv$
\vspace{-1ex}
\begin{list}{}{} \item
\mbox{}\verb@@\\
\mbox{}\verb@if (sum(vapply(list(n, delta, power, sig.level), is.null, @\\
\mbox{}\verb@    NA)) != 1) @\\
\mbox{}\verb@    stop("exactly one of 'n', 'delta', 'power', and 'sig.level' must be NULL")@\\
\mbox{}\verb@assert_NULL_or_prob(sig.level)@\\
\mbox{}\verb@assert_NULL_or_prob(power)@\\
\mbox{}\verb@@{\NWsep}
\end{list}
\vspace{-1.5ex}
\footnotesize
\begin{list}{}{\setlength{\itemsep}{-\parsep}\setlength{\itemindent}{-\leftmargin}}
\item \NWtxtMacroRefIn\ \NWlink{nuweb100}{100}.

\item{}
\end{list}
\end{minipage}\vspace{4ex}
\end{flushleft}
\begin{flushleft} \small
\begin{minipage}{\linewidth}\label{scrap81}\raggedright\small
\NWtarget{nuweb98c}{} $\langle\,${\itshape power htest output}\nobreak\ {\footnotesize {98c}}$\,\rangle\equiv$
\vspace{-1ex}
\begin{list}{}{} \item
\mbox{}\verb@@\\
\mbox{}\verb@ss <- paste(colSums(N), paste0("(", colnames(N), ")"), collapse = " + ")@\\
\mbox{}\verb@ret <- list(n = n, @\\
\mbox{}\verb@            "Total sample size" = paste(ss, "=", sum(N)),@\\
\mbox{}\verb@            power = power, @\\
\mbox{}\verb@            sig.level = sig.level)@\\
\mbox{}\verb@if (mu != 0) ret$mu <- mu@\\
\mbox{}\verb@if (K == 2L) ret[["Standard error"]] <- se@\\
\mbox{}\verb@ret[[link$parm]] <- delta@\\
\mbox{}\verb@ret$note <- "'n' is sample size in control group"@\\
\mbox{}\verb@if (B > 1) ret$note <- paste(ret$note, "of first stratum")@\\
\mbox{}\verb@alias <- link$alias@\\
\mbox{}\verb@if (length(link$alias) == 2L) alias <- alias[1L + (K > 2L)]@\\
\mbox{}\verb@ret$method <- paste(ifelse(B > 1L, "Stratified", ""), @\\
\mbox{}\verb@                    paste0(K, "-sample"), alias, @\\
\mbox{}\verb@                    "test against", link$model, "alternatives")@\\
\mbox{}\verb@class(ret) <- "power.htest"@\\
\mbox{}\verb@@{\NWsep}
\end{list}
\vspace{-1.5ex}
\footnotesize
\begin{list}{}{\setlength{\itemsep}{-\parsep}\setlength{\itemindent}{-\leftmargin}}
\item \NWtxtMacroRefIn\ \NWlink{nuweb100}{100}.

\item{}
\end{list}
\end{minipage}\vspace{4ex}
\end{flushleft}
We can invert the power function for finding nominal levels, sample or effect sizes
necessary to achieve a certain power. The option is available because the power approximation is
relatively fast.

\begin{flushleft} \small
\begin{minipage}{\linewidth}\label{scrap82}\raggedright\small
\NWtarget{nuweb99}{} $\langle\,${\itshape power inversion}\nobreak\ {\footnotesize {99}}$\,\rangle\equiv$
\vspace{-1ex}
\begin{list}{}{} \item
\mbox{}\verb@@\\
\mbox{}\verb@if (is.null(n)) @\\
\mbox{}\verb@    n <- ceiling(uniroot(function(n) {@\\
\mbox{}\verb@             @\hbox{$\langle\,${\itshape power call}\nobreak\ {\footnotesize \NWlink{nuweb98a}{98a}}$\,\rangle$}\verb@@\\
\mbox{}\verb@         }, interval = c(5, 1e+03), tol = tol, extendInt = "upX")$root)@\\
\mbox{}\verb@else if (is.null(delta)) {@\\
\mbox{}\verb@    ### 2-sample only@\\
\mbox{}\verb@    if (length(alloc_ratio) > 1L)@\\
\mbox{}\verb@        stop(gettextf("effect size can only be computed for two-sample problems in %s",@\\
\mbox{}\verb@                      "power.free1way.test"),@\\
\mbox{}\verb@             domain = NA)       @\\
\mbox{}\verb@    delta <- uniroot(function(delta) {@\\
\mbox{}\verb@             @\hbox{$\langle\,${\itshape power call}\nobreak\ {\footnotesize \NWlink{nuweb98a}{98a}}$\,\rangle$}\verb@@\\
\mbox{}\verb@### <TH> interval depending on alternative, symmetry? </TH>@\\
\mbox{}\verb@        }, interval = c(0, 10), tol = tol, extendInt = "upX")$root@\\
\mbox{}\verb@    }@\\
\mbox{}\verb@else if (is.null(sig.level)) @\\
\mbox{}\verb@    sig.level <- uniroot(function(sig.level) {@\\
\mbox{}\verb@            @\hbox{$\langle\,${\itshape power call}\nobreak\ {\footnotesize \NWlink{nuweb98a}{98a}}$\,\rangle$}\verb@@\\
\mbox{}\verb@       }, interval = c(1e-10, 1 - 1e-10), tol = tol, extendInt = "yes")$root@\\
\mbox{}\verb@@{\NWsep}
\end{list}
\vspace{-1.5ex}
\footnotesize
\begin{list}{}{\setlength{\itemsep}{-\parsep}\setlength{\itemindent}{-\leftmargin}}
\item \NWtxtMacroRefIn\ \NWlink{nuweb100}{100}.

\item{}
\end{list}
\end{minipage}\vspace{4ex}
\end{flushleft}
\begin{flushleft} \small
\begin{minipage}{\linewidth}\label{scrap83}\raggedright\small
\NWtarget{nuweb100}{} $\langle\,${\itshape power}\nobreak\ {\footnotesize {100}}$\,\rangle\equiv$
\vspace{-1ex}
\begin{list}{}{} \item
\mbox{}\verb@@\\
\mbox{}\verb@power.free1way.test <- function(n = NULL, @\\
\mbox{}\verb@                                prob = if (is.null(n)) NULL else @\\
\mbox{}\verb@                                                       rep.int(1 / n, n), @\\
\mbox{}\verb@                                alloc_ratio = 1, @\\
\mbox{}\verb@                                blocks = if (is.null(prob)) 1 else NCOL(prob), @\\
\mbox{}\verb@                                strata_ratio = 1, @\\
\mbox{}\verb@                                delta = NULL, mu = 0, @\\
\mbox{}\verb@                                sig.level = .05, power = NULL,@\\
\mbox{}\verb@                                link = c("logit", "probit", "cloglog", "loglog"),@\\
\mbox{}\verb@                                alternative = c("two.sided", "less", "greater"), @\\
\mbox{}\verb@                                nsim = 100, seed = NULL, @\\
\mbox{}\verb@                                tol = .Machine$double.eps^0.25) @\\
\mbox{}\verb@{@\\
\mbox{}\verb@@\\
\mbox{}\verb@    @\hbox{$\langle\,${\itshape power args check}\nobreak\ {\footnotesize \NWlink{nuweb98b}{98b}}$\,\rangle$}\verb@@\\
\mbox{}\verb@@\\
\mbox{}\verb@    @\hbox{$\langle\,${\itshape random seed}\nobreak\ {\footnotesize \NWlink{nuweb96}{96}}$\,\rangle$}\verb@@\\
\mbox{}\verb@@\\
\mbox{}\verb@    @\hbox{$\langle\,${\itshape r2dsim}\nobreak\ {\footnotesize \NWlink{nuweb95}{95}}$\,\rangle$}\verb@@\\
\mbox{}\verb@@\\
\mbox{}\verb@    @\hbox{$\langle\,${\itshape power inversion}\nobreak\ {\footnotesize \NWlink{nuweb99}{99}}$\,\rangle$}\verb@@\\
\mbox{}\verb@@\\
\mbox{}\verb@    ### n is available now@\\
\mbox{}\verb@    if (is.null(prob)) prob <- rep(1 / n, n)@\\
\mbox{}\verb@@\\
\mbox{}\verb@    @\hbox{$\langle\,${\itshape power setup}\nobreak\ {\footnotesize \NWlink{nuweb97a}{97a}}$\,\rangle$}\verb@@\\
\mbox{}\verb@    @\hbox{$\langle\,${\itshape estimate Fisher information}\nobreak\ {\footnotesize \NWlink{nuweb97b}{97b}}$\,\rangle$}\verb@@\\
\mbox{}\verb@@\\
\mbox{}\verb@    alternative <- match.arg(alternative)@\\
\mbox{}\verb@    if (K == 2L) {@\\
\mbox{}\verb@        se <- 1 / sqrt(c(he))@\\
\mbox{}\verb@        power  <- switch(alternative, @\\
\mbox{}\verb@            "two.sided" = pnorm(qnorm(sig.level / 2) + deltamu / se) + @\\
\mbox{}\verb@                          pnorm(qnorm(sig.level / 2) - deltamu / se),@\\
\mbox{}\verb@            "less" = pnorm(qnorm(sig.level) - deltamu / se),@\\
\mbox{}\verb@            "greater" = pnorm(qnorm(sig.level) + deltamu / se)@\\
\mbox{}\verb@        )@\\
\mbox{}\verb@    } else {@\\
\mbox{}\verb@        if (alternative != "two.sided")@\\
\mbox{}\verb@            stop(gettextf("%s only allows two-sided alternatives in the presence of more than two groups",@\\
\mbox{}\verb@                          "power.free1way.test"),@\\
\mbox{}\verb@                 domain = NA)@\\
\mbox{}\verb@        ncp <- sum((chol(he) %*% deltamu)^2)@\\
\mbox{}\verb@        qsig <- qchisq(sig.level, df = K - 1L, lower.tail = FALSE)@\\
\mbox{}\verb@        power <- pchisq(qsig, df = K - 1L, ncp = ncp, lower.tail = FALSE)@\\
\mbox{}\verb@    }@\\
\mbox{}\verb@@\\
\mbox{}\verb@    @\hbox{$\langle\,${\itshape power htest output}\nobreak\ {\footnotesize \NWlink{nuweb98c}{98c}}$\,\rangle$}\verb@@\\
\mbox{}\verb@@\\
\mbox{}\verb@    ret@\\
\mbox{}\verb@}@\\
\mbox{}\verb@@{\NWsep}
\end{list}
\vspace{-1.5ex}
\footnotesize
\begin{list}{}{\setlength{\itemsep}{-\parsep}\setlength{\itemindent}{-\leftmargin}}
\item \NWtxtMacroRefIn\ \NWlink{nuweb25b}{25b}.

\item{}
\end{list}
\end{minipage}\vspace{4ex}
\end{flushleft}
We start with the power of a binomial experiment with $N = 2 \times 25$
observations. In the control group, the odds of winning is 1. Under
treatment, we increase this odds by $50\%$. We compare the results with
\code{power.prop.test}:

<<power.prop.test>>=
delta <- log(1.5)
power.prop.test(n = 25, p1 = .5, p2 = plogis(qlogis(.5) - delta))
power.free1way.test(n = 25, prob = c(.5, .5), delta = delta)
@

Under stratification (twice as many observations in the second stratum) 
and with an ordered outcome at four levels, we might want to compare four
groups, with $25\%$, $50\%$, and $75\%$ increase compared to the odds of the
control:
<<power.odds.test>>=
prb <- matrix(c(.25, .25, .25, .25,
                .10, .20, .30, .40), ncol = 2)
colnames(prb) <- c("s1", "s2")
power.free1way.test(n = 20, prob = prb, 
                    strata_ratio = 2,
                    alloc_ratio = c(1.5, 2, 2), 
                    delta = log(c("low" = 1.25, "med" = 1.5, "high" = 1.75)))
@

We now estimate the power of a Wilcoxon test with, first by simulation from
a logistic distribution, and then by our power function:

<<wilcox>>=

delta <- log(3)
N <- 15
w <- gl(2, N)
pw <- numeric(Nsim)
for (i in seq_along(pw)) {
    y <- rlogis(length(w), location = c(0, delta)[w])
    pw[i] <- wilcox.test(y ~ w)$p.value
}
mean(pw < .05)

power.free1way.test(n = N, delta = delta)

### approximate formula in Hmisc::popower
library("Hmisc")
popower(p = rep(1 / N, N), odds.ratio = exp(delta), n = 2 * N)
@

The power of the Kruskal-Wallis test only needs one additional treatment
effect

<<kruskal>>=
delta <- c("B" = log(2), "C" = log(3))
N <- 15
w <- gl(3, N)
pw <- numeric(Nsim)
for (i in seq_along(pw)) {
    y <- rlogis(length(w), location = c(0, delta)[w])
    pw[i] <- kruskal.test(y ~ w)$p.value
}
mean(pw < .05)

power.free1way.test(n = N, delta = delta)
@

We next use the \code{rfree1way} function to sample from $4 \times 3$ tables with odds ratios $2$ and $3$
and compare the resulting power with result obtained from the approximated
Fisher information. By default, the continuous control distribution is
uniform on the unit interval, thus \code{cut} with breaks defined by the
target control discrete probability distribution generates the outcome.
The plot shows the distribution of the parameter
estimates and the corresponding population values as red dots
(Figure~\ref{fig:POsim}).	

\begin{figure}
<<table, fig = TRUE>>=
prb <- rep.int(1, 4) / 4
pw <- numeric(Nsim)
cf <- matrix(0, nrow = Nsim, ncol = length(delta))
colnames(cf) <- names(delta)
for (i in seq_along(pw)) {
    nd <- rfree1way(n = N, prob = prb, delta = delta)
    ft <- free1way(y ~ groups, data = nd)
    cf[i,] <- coef(ft)
    pw[i] <- summary(ft, test = "Permutation")$p.value
}
mean(pw < .05)
boxplot(cf, las = 1, ylab = expression(hat(delta)))
points(c(1:2), delta, pch = 19, col = "red")
power.free1way.test(n = N, prob = prb, delta = delta)
@
\caption{Power simulation for proportional-odds model and corresponding
power approximation. \label{fig:POsim}}
\end{figure}

In the last example, we sample from $4 \times 3$ tables with odds ratios $2$ and $3$ for three
strata with different control distributions, see Figure~\ref{fig:POstrata}, and again compare the
simulation results to the power function.

\begin{figure}
<<stable, fig = TRUE>>=
prb <- cbind(S1 = rep(1, 4), 
             S2 = c(1, 2, 1, 2), 
             S3 = 1:4)
dimnames(prb) <- list(Ctrl = paste0("i", seq_len(nrow(prb))),
                      Strata = colnames(prb))

pw <- numeric(Nsim)
cf <- matrix(0, nrow = Nsim, ncol = length(delta))
colnames(cf) <- names(delta)
for (i in seq_along(pw)) {
    nd <- rfree1way(n = N, prob = prb, delta = delta)
    ft <- free1way(y ~ groups | blocks, data = nd)
    cf[i,] <- coef(ft)
    pw[i] <- summary(ft, test = "Permutation")$p.value
}
mean(pw < .05)
boxplot(cf, las = 1, ylab = expression(hat(delta)))
points(c(1:2), delta, pch = 19, col = "red")
@
\caption{Power simulation for stratified proportional-odds model and corresponding
power approximation. \label{fig:POstrata}}
\end{figure}

<<powertest>>=
power.free1way.test(n = N, prob = prb, delta = delta, seed = 3)
power.free1way.test(power = .8, prob = prb, delta = delta, seed = 3)
power.free1way.test(n = 19, prob = prb, delta = delta, seed = 3)
@

\chapter{Penalisation} \label{ch:penal}

Sometimes, especially under complete separation, the maximum likelihood
estimator does not exist. We could think of offering the option to add a
penalty term to the log-likelihood, for example half of the log-determinant
of the Hessian (Jeffreys prior) as suggested by \cite{Firth1993} and studied in
\cite{KosmidisFirth2020}. Here is an example

<<Jeffreys>>=
N <- 20
w <- gl(2, N)
y <- rnorm(length(w), mean = c(-2, 3)[w])

x <- free1way(y ~ w, link = "probit")
coef(x)
logLik(x)

pll <- function(cf) {

    start <- x$par
    start[1] <- cf
    x$profile(start, fix = 1)
}

### https://doi.org/10.1111/j.0006-341X.2001.00114.x
### https://doi.org/10.1111/j.1467-9876.2012.01057.x
### https://doi.org/10.1186/s12874-017-0313-9
### https://files.osf.io/v1/resources/fet4d_v3/providers/osfstorage/682fb176db88f967facacb5a?format=pdf&action=download&direct&version=1
### https://doi.org/10.1002/sim.6537
### https://doi.org/10.1007/s11222-023-10217-3
### https://arxiv.org/abs/2510.06465
fun <- function(cf) {
    ret <- pll(cf)
    ret$value - .5 * determinant(ret$hessian, logarithm = TRUE)$modulus
}

ci <- confint(x, level = .99, test = "Wald")
grd <- seq(from = ci[1], to = ci[2], length.out = 50)

optim(coef(x), fn = fun, method = "Brent", 
      lower = min(grd), upper = max(grd))[c("par", "value")]
@

The \code{MPL_Jeffreys} argument can be used to request this type of
penalisation from \code{free1way} (this argument should be added to
\code{free1way.table} and documented)
<<MPL_Jeffreys>>=
free1way(y ~ w, link = "probit", MPL_Jeffreys = TRUE)
@

\chapter{Acknowledgements}

We would like to thank Frank Harrell, Michael Fay, Bryan Shepherd, and Ioannis Kosmidis
for insights and valuable discussions.

\chapter*{Index}

\section*{Files}


{\small\begin{list}{}{\setlength{\itemsep}{-\parsep}\setlength{\itemindent}{-\leftmargin}}
\item \verb@"free1way.R"@ {\footnotesize {\NWtxtDefBy} \NWlink{nuweb25b}{25b}.}
\item \verb@"linkfun.R"@ {\footnotesize {\NWtxtDefBy} \NWlink{nuweb16}{16}.}
\item \verb@"utils.R"@ {\footnotesize {\NWtxtDefBy} \NWlink{nuweb25a}{25a}.}
\end{list}}

\section*{Fragments}


{\small\begin{list}{}{\setlength{\itemsep}{-\parsep}\setlength{\itemindent}{-\leftmargin}}
\item $\langle\,$add legend\nobreak\ {\footnotesize \NWlink{nuweb83c}{83c}}$\,\rangle$ {\footnotesize {\NWtxtRefIn} \NWlink{nuweb83d}{83d}.}
\item $\langle\,$cloglog\nobreak\ {\footnotesize \NWlink{nuweb20}{20}}$\,\rangle$ {\footnotesize {\NWtxtRefIn} \NWlink{nuweb16}{16}.}
\item $\langle\,$confint lower\nobreak\ {\footnotesize \NWlink{nuweb57}{57}}$\,\rangle$ {\footnotesize {\NWtxtRefIn} \NWlink{nuweb59}{59}.}
\item $\langle\,$confint upper\nobreak\ {\footnotesize \NWlink{nuweb58}{58}}$\,\rangle$ {\footnotesize {\NWtxtRefIn} \NWlink{nuweb59}{59}.}
\item $\langle\,$density prob ratio\nobreak\ {\footnotesize \NWlink{nuweb4a}{4a}}$\,\rangle$ {\footnotesize {\NWtxtRefIn} \NWlink{nuweb4b}{4b}\NWlink{nuweb4c}{c}.
}
\item $\langle\,$design args\nobreak\ {\footnotesize \NWlink{nuweb91}{91}}$\,\rangle$ {\footnotesize {\NWtxtRefIn} \NWlink{nuweb92}{92}\NWlink{nuweb97a}{, 97a}.
}
\item $\langle\,$determine steps in blocks\nobreak\ {\footnotesize \NWlink{nuweb9}{9}}$\,\rangle$ {\footnotesize {\NWtxtRefIn} \NWlink{nuweb10a}{10a}.}
\item $\langle\,$diagonal elements for Hessian of intercepts\nobreak\ {\footnotesize \NWlink{nuweb5c}{5c}}$\,\rangle$ {\footnotesize {\NWtxtRefIn} \NWlink{nuweb7}{7}.}
\item $\langle\,$do optim\nobreak\ {\footnotesize \NWlink{nuweb27}{27}}$\,\rangle$ {\footnotesize {\NWtxtRefIn} \NWlink{nuweb29}{29}\NWlink{nuweb30b}{, 30b}.
}
\item $\langle\,$estimate Fisher information\nobreak\ {\footnotesize \NWlink{nuweb97b}{97b}}$\,\rangle$ {\footnotesize {\NWtxtRefIn} \NWlink{nuweb100}{100}.}
\item $\langle\,$exact proportional odds\nobreak\ {\footnotesize \NWlink{nuweb42}{42}}$\,\rangle$ {\footnotesize {\NWtxtRefIn} \NWlink{nuweb47}{47}.}
\item $\langle\,$extract plot data\nobreak\ {\footnotesize \NWlink{nuweb81}{81}}$\,\rangle$ {\footnotesize {\NWtxtRefIn} \NWlink{nuweb83d}{83d}.}
\item $\langle\,$formula business\nobreak\ {\footnotesize \NWlink{nuweb48}{48}}$\,\rangle$ {\footnotesize {\NWtxtRefIn} \NWlink{nuweb49}{49}.}
\item $\langle\,$free1way confint\nobreak\ {\footnotesize \NWlink{nuweb59}{59}}$\,\rangle$ {\footnotesize {\NWtxtRefIn} \NWlink{nuweb25b}{25b}.}
\item $\langle\,$free1way factor\nobreak\ {\footnotesize \NWlink{nuweb52}{52}}$\,\rangle$ {\footnotesize {\NWtxtRefIn} \NWlink{nuweb25b}{25b}.}
\item $\langle\,$free1way formula\nobreak\ {\footnotesize \NWlink{nuweb49}{49}}$\,\rangle$ {\footnotesize {\NWtxtRefIn} \NWlink{nuweb25b}{25b}.}
\item $\langle\,$free1way generic and table method (main workhorse)\nobreak\ {\footnotesize \NWlink{nuweb46}{46}}$\,\rangle$ {\footnotesize {\NWtxtRefIn} \NWlink{nuweb25b}{25b}.}
\item $\langle\,$free1way methods\nobreak\ {\footnotesize \NWlink{nuweb53}{53}}$\,\rangle$ {\footnotesize {\NWtxtRefIn} \NWlink{nuweb25b}{25b}.}
\item $\langle\,$free1way numeric\nobreak\ {\footnotesize \NWlink{nuweb51}{51}}$\,\rangle$ {\footnotesize {\NWtxtRefIn} \NWlink{nuweb25b}{25b}.}
\item $\langle\,$free1way permutation tests\nobreak\ {\footnotesize \NWlink{nuweb47}{47}}$\,\rangle$ {\footnotesize {\NWtxtRefIn} \NWlink{nuweb46}{46}.}
\item $\langle\,$free1way print\nobreak\ {\footnotesize \NWlink{nuweb54}{54}}$\,\rangle$ {\footnotesize {\NWtxtRefIn} \NWlink{nuweb25b}{25b}.}
\item $\langle\,$free1way summary\nobreak\ {\footnotesize \NWlink{nuweb55}{55}}$\,\rangle$ {\footnotesize {\NWtxtRefIn} \NWlink{nuweb25b}{25b}.}
\item $\langle\,$full Hessian\nobreak\ {\footnotesize \NWlink{nuweb13}{13}}$\,\rangle$ {\footnotesize {\NWtxtRefIn} \NWlink{nuweb14}{14}.}
\item $\langle\,$Hessian\nobreak\ {\footnotesize \NWlink{nuweb7}{7}}$\,\rangle$ {\footnotesize {\NWtxtRefIn} \NWlink{nuweb33}{33}.}
\item $\langle\,$Hessian prep\nobreak\ {\footnotesize \NWlink{nuweb5a}{5a}}$\,\rangle$ {\footnotesize {\NWtxtRefIn} \NWlink{nuweb7}{7}.}
\item $\langle\,$intercept / shift contributions to Hessian\nobreak\ {\footnotesize \NWlink{nuweb6}{6}}$\,\rangle$ {\footnotesize {\NWtxtRefIn} \NWlink{nuweb7}{7}.}
\item $\langle\,$Jeffreys penalisation\nobreak\ {\footnotesize \NWlink{nuweb30a}{30a}}$\,\rangle$ {\footnotesize {\NWtxtRefIn} \NWlink{nuweb27}{27}.}
\item $\langle\,$link2fun\nobreak\ {\footnotesize \NWlink{nuweb45}{45}}$\,\rangle$ {\footnotesize {\NWtxtRefIn} \NWlink{nuweb46}{46}\NWlink{nuweb92}{, 92}\NWlink{nuweb95}{, 95}\NWlink{nuweb97a}{, 97a}.
}
\item $\langle\,$linkfun\nobreak\ {\footnotesize \NWlink{nuweb17}{17}}$\,\rangle$ {\footnotesize {\NWtxtRefIn} \NWlink{nuweb16}{16}.}
\item $\langle\,$logit\nobreak\ {\footnotesize \NWlink{nuweb18}{18}}$\,\rangle$ {\footnotesize {\NWtxtRefIn} \NWlink{nuweb16}{16}.}
\item $\langle\,$logLik, gradient, Hessian\nobreak\ {\footnotesize \NWlink{nuweb28}{28}}$\,\rangle$ {\footnotesize {\NWtxtRefIn} \NWlink{nuweb29}{29}.}
\item $\langle\,$loglog\nobreak\ {\footnotesize \NWlink{nuweb19}{19}}$\,\rangle$ {\footnotesize {\NWtxtRefIn} \NWlink{nuweb16}{16}.}
\item $\langle\,$LRT\nobreak\ {\footnotesize \NWlink{nuweb37a}{37a}}$\,\rangle$ {\footnotesize {\NWtxtRefIn} \NWlink{nuweb36a}{36a}.}
\item $\langle\,$marginal fit\nobreak\ {\footnotesize \NWlink{nuweb82b}{82b}}$\,\rangle$ {\footnotesize {\NWtxtRefIn} \NWlink{nuweb83d}{83d}.}
\item $\langle\,$marginal plot\nobreak\ {\footnotesize \NWlink{nuweb83a}{83a}}$\,\rangle$ {\footnotesize {\NWtxtRefIn} \NWlink{nuweb83d}{83d}.}
\item $\langle\,$ML estimation\nobreak\ {\footnotesize \NWlink{nuweb33}{33}}$\,\rangle$ {\footnotesize {\NWtxtRefIn} \NWlink{nuweb25b}{25b}.}
\item $\langle\,$model plot\nobreak\ {\footnotesize \NWlink{nuweb83b}{83b}}$\,\rangle$ {\footnotesize {\NWtxtRefIn} \NWlink{nuweb83d}{83d}.}
\item $\langle\,$negative logLik\nobreak\ {\footnotesize \NWlink{nuweb3b}{3b}}$\,\rangle$ {\footnotesize {\NWtxtRefIn} \NWlink{nuweb33}{33}.}
\item $\langle\,$negative score\nobreak\ {\footnotesize \NWlink{nuweb4b}{4b}}$\,\rangle$ {\footnotesize {\NWtxtRefIn} \NWlink{nuweb33}{33}.}
\item $\langle\,$negative score residuals\nobreak\ {\footnotesize \NWlink{nuweb4c}{4c}}$\,\rangle$ {\footnotesize {\NWtxtRefIn} \NWlink{nuweb33}{33}.}
\item $\langle\,$Newton\nobreak\ {\footnotesize ?}$\,\rangle$ {\footnotesize {\NWtxtNoRef}.}
\item $\langle\,$Newton convergence\nobreak\ {\footnotesize \NWlink{nuweb23b}{23b}}$\,\rangle$ {\footnotesize {\NWtxtRefIn} \NWlink{nuweb24}{24}.}
\item $\langle\,$Newton step halving\nobreak\ {\footnotesize \NWlink{nuweb23a}{23a}}$\,\rangle$ {\footnotesize {\NWtxtRefIn} \NWlink{nuweb24}{24}.}
\item $\langle\,$Newton update\nobreak\ {\footnotesize \NWlink{nuweb22}{22}}$\,\rangle$ {\footnotesize {\NWtxtRefIn} \NWlink{nuweb24}{24}.}
\item $\langle\,$NewtonRaphson\nobreak\ {\footnotesize \NWlink{nuweb24}{24}}$\,\rangle$ {\footnotesize {\NWtxtRefIn} \NWlink{nuweb25b}{25b}.}
\item $\langle\,$off-diagonal elements for Hessian of intercepts\nobreak\ {\footnotesize \NWlink{nuweb5b}{5b}}$\,\rangle$ {\footnotesize {\NWtxtRefIn} \NWlink{nuweb7}{7}.}
\item $\langle\,$optim\nobreak\ {\footnotesize \NWlink{nuweb30b}{30b}}$\,\rangle$ {\footnotesize {\NWtxtRefIn} \NWlink{nuweb33}{33}.}
\item $\langle\,$parm to prob\nobreak\ {\footnotesize \NWlink{nuweb3a}{3a}}$\,\rangle$ {\footnotesize {\NWtxtRefIn} \NWlink{nuweb3b}{3b}\NWlink{nuweb4b}{, 4b}\NWlink{nuweb4c}{c}\NWlink{nuweb7}{, 7}.
}
\item $\langle\,$permutation confint\nobreak\ {\footnotesize \NWlink{nuweb56}{56}}$\,\rangle$ {\footnotesize {\NWtxtRefIn} \NWlink{nuweb59}{59}.}
\item $\langle\,$Permutation p-values\nobreak\ {\footnotesize \NWlink{nuweb39}{39}}$\,\rangle$ {\footnotesize {\NWtxtRefIn} \NWlink{nuweb54}{54}.}
\item $\langle\,$Permutation statistics\nobreak\ {\footnotesize \NWlink{nuweb38}{38}}$\,\rangle$ {\footnotesize {\NWtxtRefIn} \NWlink{nuweb36a}{36a}.}
\item $\langle\,$plot free1way\nobreak\ {\footnotesize \NWlink{nuweb83d}{83d}}$\,\rangle$ {\footnotesize {\NWtxtRefIn} \NWlink{nuweb25b}{25b}.}
\item $\langle\,$post processing\nobreak\ {\footnotesize \NWlink{nuweb31}{31}}$\,\rangle$ {\footnotesize {\NWtxtRefIn} \NWlink{nuweb33}{33}.}
\item $\langle\,$power\nobreak\ {\footnotesize \NWlink{nuweb100}{100}}$\,\rangle$ {\footnotesize {\NWtxtRefIn} \NWlink{nuweb25b}{25b}.}
\item $\langle\,$power args check\nobreak\ {\footnotesize \NWlink{nuweb98b}{98b}}$\,\rangle$ {\footnotesize {\NWtxtRefIn} \NWlink{nuweb100}{100}.}
\item $\langle\,$power call\nobreak\ {\footnotesize \NWlink{nuweb98a}{98a}}$\,\rangle$ {\footnotesize {\NWtxtRefIn} \NWlink{nuweb99}{99}.}
\item $\langle\,$power htest output\nobreak\ {\footnotesize \NWlink{nuweb98c}{98c}}$\,\rangle$ {\footnotesize {\NWtxtRefIn} \NWlink{nuweb100}{100}.}
\item $\langle\,$power inversion\nobreak\ {\footnotesize \NWlink{nuweb99}{99}}$\,\rangle$ {\footnotesize {\NWtxtRefIn} \NWlink{nuweb100}{100}.}
\item $\langle\,$power setup\nobreak\ {\footnotesize \NWlink{nuweb97a}{97a}}$\,\rangle$ {\footnotesize {\NWtxtRefIn} \NWlink{nuweb100}{100}.}
\item $\langle\,$ppplot\nobreak\ {\footnotesize \NWlink{nuweb88}{88}}$\,\rangle$ {\footnotesize {\NWtxtRefIn} \NWlink{nuweb25b}{25b}.}
\item $\langle\,$probit\nobreak\ {\footnotesize \NWlink{nuweb21}{21}}$\,\rangle$ {\footnotesize {\NWtxtRefIn} \NWlink{nuweb16}{16}.}
\item $\langle\,$profile\nobreak\ {\footnotesize \NWlink{nuweb29}{29}}$\,\rangle$ {\footnotesize {\NWtxtRefIn} \NWlink{nuweb33}{33}.}
\item $\langle\,$r2dsim\nobreak\ {\footnotesize \NWlink{nuweb95}{95}}$\,\rangle$ {\footnotesize {\NWtxtRefIn} \NWlink{nuweb100}{100}.}
\item $\langle\,$random seed\nobreak\ {\footnotesize \NWlink{nuweb96}{96}}$\,\rangle$ {\footnotesize {\NWtxtRefIn} \NWlink{nuweb100}{100}.}
\item $\langle\,$Rao\nobreak\ {\footnotesize \NWlink{nuweb37b}{37b}}$\,\rangle$ {\footnotesize {\NWtxtRefIn} \NWlink{nuweb36a}{36a}.}
\item $\langle\,$refit block intercepts\nobreak\ {\footnotesize \NWlink{nuweb82a}{82a}}$\,\rangle$ {\footnotesize {\NWtxtRefIn} \NWlink{nuweb83d}{83d}.}
\item $\langle\,$resampling\nobreak\ {\footnotesize \NWlink{nuweb41}{41}}$\,\rangle$ {\footnotesize {\NWtxtRefIn} \NWlink{nuweb47}{47}.}
\item $\langle\,$rfree1way\nobreak\ {\footnotesize \NWlink{nuweb92}{92}}$\,\rangle$ {\footnotesize {\NWtxtRefIn} \NWlink{nuweb25b}{25b}.}
\item $\langle\,$ROC bands\nobreak\ {\footnotesize \NWlink{nuweb87}{87}}$\,\rangle$ {\footnotesize {\NWtxtRefIn} \NWlink{nuweb88}{88}.}
\item $\langle\,$setup and starting values\nobreak\ {\footnotesize \NWlink{nuweb26}{26}}$\,\rangle$ {\footnotesize {\NWtxtRefIn} \NWlink{nuweb33}{33}.}
\item $\langle\,$setup canvas\nobreak\ {\footnotesize \NWlink{nuweb82c}{82c}}$\,\rangle$ {\footnotesize {\NWtxtRefIn} \NWlink{nuweb83d}{83d}.}
\item $\langle\,$statistics\nobreak\ {\footnotesize \NWlink{nuweb36a}{36a}}$\,\rangle$ {\footnotesize {\NWtxtRefIn} \NWlink{nuweb54}{54}\NWlink{nuweb59}{, 59}.
}
\item $\langle\,$Strasser Weber\nobreak\ {\footnotesize \NWlink{nuweb40}{40}}$\,\rangle$ {\footnotesize {\NWtxtRefIn} \NWlink{nuweb47}{47}.}
\item $\langle\,$stratified Hessian\nobreak\ {\footnotesize \NWlink{nuweb14}{14}}$\,\rangle$ {\footnotesize {\NWtxtRefIn} \NWlink{nuweb33}{33}.}
\item $\langle\,$stratified negative logLik\nobreak\ {\footnotesize \NWlink{nuweb11a}{11a}}$\,\rangle$ {\footnotesize {\NWtxtRefIn} \NWlink{nuweb33}{33}.}
\item $\langle\,$stratified negative score\nobreak\ {\footnotesize \NWlink{nuweb11b}{11b}}$\,\rangle$ {\footnotesize {\NWtxtRefIn} \NWlink{nuweb33}{33}.}
\item $\langle\,$stratified negative score residual\nobreak\ {\footnotesize \NWlink{nuweb11c}{11c}}$\,\rangle$ {\footnotesize {\NWtxtRefIn} \NWlink{nuweb33}{33}.}
\item $\langle\,$stratum prep\nobreak\ {\footnotesize \NWlink{nuweb10b}{10b}}$\,\rangle$ {\footnotesize {\NWtxtRefIn} \NWlink{nuweb11a}{11a}\NWlink{nuweb11b}{b}\NWlink{nuweb11c}{c}\NWlink{nuweb14}{, 14}\NWlink{nuweb31}{, 31}.
}
\item $\langle\,$table2list body\nobreak\ {\footnotesize \NWlink{nuweb10a}{10a}}$\,\rangle$ {\footnotesize {\NWtxtRefIn} \NWlink{nuweb26}{26}.}
\item $\langle\,$variable names and checks\nobreak\ {\footnotesize \NWlink{nuweb50}{50}}$\,\rangle$ {\footnotesize {\NWtxtRefIn} \NWlink{nuweb51}{51}\NWlink{nuweb52}{, 52}.
}
\item $\langle\,$Wald statistic\nobreak\ {\footnotesize \NWlink{nuweb36b}{36b}}$\,\rangle$ {\footnotesize {\NWtxtRefIn} \NWlink{nuweb36a}{36a}.}
\end{list}}

\section*{Identifiers}


\bibliographystyle{plainnat}
\bibliography{\Sexpr{gsub("\\.bib", "", system.file("REFERENCES.bib", package = "free1way.docreg"))}}

\end{document}