Quantitative patterns in constraint interaction: TAGALOG

Introduction

This is an R Markdown document. Markdown is a simple formatting syntax for authoring web pages.

If you are viewing this in a web browser, then an .Rmd file has been “knit” into a web page that includes the results of running embedded chunks of R code.

If you are viewing this in RStudio, When you click the Knit HTML button a web page will be generated that includes both content as well as the output of any embedded R code chunks within the document. You may need to use the Session menu to set the working directory to Source file location.

This document accompanies one section of a manuscript by ANONYMOUS.

Preliminaries

Set up graphical parameters and other preliminaries for the whole script

set.seed(123) #in case of any randomization

#load packages
require(grid) || install.packages("grid") 
## Loading required package: grid
## [1] TRUE
require(grid)
require(vcd) || install.packages("vcd") 
## Loading required package: vcd
## [1] TRUE
require(vcd)
require(lme4) || install.packages("lme4") 
## Loading required package: lme4
## Loading required package: Matrix
## Loading required package: Rcpp
## [1] TRUE
require(lme4)
require(car) || install.packages("car") 
## Loading required package: car
## [1] TRUE
require(car)
require(multcomp) || install.packages("multcomp") 
## Loading required package: multcomp
## Loading required package: mvtnorm
## Loading required package: survival
## Loading required package: splines
## Loading required package: TH.data
## [1] TRUE
require(multcomp)
require(stringr) || install.packages("stringr") 
## Loading required package: stringr
## [1] TRUE
require(stringr)

#set up plot parameters
myFontFamily="serif" 

par(font=list(family=myFontFamily)) #for most plots
#Unfortunately, this doesn't carry over when a .png file is written, so it is necessary to include
#family=myFontFamily
#as one of the arguments to the png() function

#Also unfortunately, this doesn't carry over to mosaic plots, which use gpar (and gpar can't be set like this for the whole script). Instead, we create this object:
gpSerif <- gpar(fontfamily=myFontFamily)
#and then each call to mosaic() has to include the line 
#gp_varnames=gpSerif,gp_labels=gpSerif,
#Note that for adding the numbers to the tiles in the mosaic plots with mtext(), we just use
#, fontfamily=myFontFamily
#in the list of gpar() arguments

myResMultiplier <- 5 #default is 72 ppi; using this in every call to png() will make it 360

Pipeline to the input file for this script

  1. Enter words with nasal-substituting morphology from Leo English’s 1987 Tagalog-English Dictionary into a FileMaker file.

  2. Extract counts of substituted, unsubstituted, and varying for each prefix type and each consonant.

Place these counts into a data frame:

#The raw data
consonant <- c("p","p","p","p","p","p","t/s","t/s","t/s","t/s","t/s","t/s","k","k","k","k","k","k","b","b","b","b","b","b","d","d","d","d","d","d","g","g","g","g","g","g")
morph <- c("maN- other", "paN-RED-", "maN-RED-", "maN- adv.", "paN- noun", "paN- res.","maN- other", "paN-RED-", "maN-RED-", "maN- adv.", "paN- noun", "paN- res.","maN- other", "paN-RED-", "maN-RED-", "maN- adv.", "paN- noun", "paN- res.","maN- other", "paN-RED-", "maN-RED-", "maN- adv.", "paN- noun", "paN- res.","maN- other", "paN-RED-", "maN-RED-", "maN- adv.", "paN- noun", "paN- res.","maN- other", "paN-RED-", "maN-RED-", "maN- adv.", "paN- noun", "paN- res.")
morph_unicode <- c("ma\U014B- other", "pa\U014B-\U0280\U1D07\U1D05-", "ma\U014B-\U0280\U1D07\U1D05-", "ma\U014B- adv", "pa\U014B- noun", "pa\U014B- res","ma\U014B- other", "pa\U014B-\U0280\U1D07\U1D05-", "ma\U014B-\U0280\U1D07\U1D05-", "ma\U014B- adv", "pa\U014B- noun", "pa\U014B- res","ma\U014B- other", "pa\U014B-\U0280\U1D07\U1D05-", "ma\U014B-\U0280\U1D07\U1D05-", "ma\U014B- adv", "pa\U014B- noun", "pa\U014B- res","ma\U014B- other", "pa\U014B-\U0280\U1D07\U1D05-", "ma\U014B-\U0280\U1D07\U1D05-", "ma\U014B- adv", "pa\U014B- noun", "pa\U014B- res","ma\U014B- other", "pa\U014B-\U0280\U1D07\U1D05-", "ma\U014B-\U0280\U1D07\U1D05-", "ma\U014B- adv", "pa\U014B- noun", "pa\U014B- res","ma\U014B- other", "pa\U014B-\U0280\U1D07\U1D05-", "ma\U014B-\U0280\U1D07\U1D05-", "ma\U014B- adv", "pa\U014B- noun", "pa\U014B- res")
subst_count <- c(65,39,18,11,30.5,4.5,71,63,44,51,60.5,5,74.5,25,20,11,9.5,2,66,29.5,16.5,6,15,1.5,7,3,1,0,1,0,0,1,0,0,0,0)
unsubst_count <-c(0,0,0,0,6.5,4.5,0,0,1,0,29.5,13,0.5,1,2,0,9.5,5,6,5.5,21.5,6,30,17.5,3,7,12,12,17,7,13,17,12,12,27,4)

#Combine into data frame
nas_sub <- data.frame(consonant, morph, morph_unicode, subst_count, unsubst_count)
#reorder levels of consonant and morph
nas_sub$consonant <- factor(nas_sub$consonant,levels(nas_sub$consonant)[c(5,6,4,1,2,3)])
nas_sub$morph <- factor(nas_sub$morph,levels(nas_sub$morph)[c(2,6,1,3,4,5)])
nas_sub$morph_unicode <- factor(nas_sub$morph_unicode,levels(nas_sub$morph_unicode)[c(2,6,1,3,4,5)])

#Add column for substitution rate
nas_sub$subst_rate <- nas_sub$subst_count / (nas_sub$subst_count + nas_sub$unsubst_count)

#Here's what the data frame looks like (not including the Unicode column):
nas_sub[,c(1,2,4,5,6)]
##    consonant      morph subst_count unsubst_count subst_rate
## 1          p maN- other        65.0           0.0    1.00000
## 2          p   paN-RED-        39.0           0.0    1.00000
## 3          p   maN-RED-        18.0           0.0    1.00000
## 4          p  maN- adv.        11.0           0.0    1.00000
## 5          p  paN- noun        30.5           6.5    0.82432
## 6          p  paN- res.         4.5           4.5    0.50000
## 7        t/s maN- other        71.0           0.0    1.00000
## 8        t/s   paN-RED-        63.0           0.0    1.00000
## 9        t/s   maN-RED-        44.0           1.0    0.97778
## 10       t/s  maN- adv.        51.0           0.0    1.00000
## 11       t/s  paN- noun        60.5          29.5    0.67222
## 12       t/s  paN- res.         5.0          13.0    0.27778
## 13         k maN- other        74.5           0.5    0.99333
## 14         k   paN-RED-        25.0           1.0    0.96154
## 15         k   maN-RED-        20.0           2.0    0.90909
## 16         k  maN- adv.        11.0           0.0    1.00000
## 17         k  paN- noun         9.5           9.5    0.50000
## 18         k  paN- res.         2.0           5.0    0.28571
## 19         b maN- other        66.0           6.0    0.91667
## 20         b   paN-RED-        29.5           5.5    0.84286
## 21         b   maN-RED-        16.5          21.5    0.43421
## 22         b  maN- adv.         6.0           6.0    0.50000
## 23         b  paN- noun        15.0          30.0    0.33333
## 24         b  paN- res.         1.5          17.5    0.07895
## 25         d maN- other         7.0           3.0    0.70000
## 26         d   paN-RED-         3.0           7.0    0.30000
## 27         d   maN-RED-         1.0          12.0    0.07692
## 28         d  maN- adv.         0.0          12.0    0.00000
## 29         d  paN- noun         1.0          17.0    0.05556
## 30         d  paN- res.         0.0           7.0    0.00000
## 31         g maN- other         0.0          13.0    0.00000
## 32         g   paN-RED-         1.0          17.0    0.05556
## 33         g   maN-RED-         0.0          12.0    0.00000
## 34         g  maN- adv.         0.0          12.0    0.00000
## 35         g  paN- noun         0.0          27.0    0.00000
## 36         g  paN- res.         0.0           4.0    0.00000

Mosaic plots by consonant and by morphology

All morphology lumped together–note that this includes constructions other than the 6 in nas_sub, so total counts are higher–data separated by consonant:

#make an object with all the data in it
all_Cs<-structure(c(10,40,17,100,70,97,  253,430,185,177,25,1),
 .Dim=as.integer(c(6,2)),
 .Dimnames=structure(list(Stem.initial.obstruent=
 c("p", "t/s", "k", "b", "d", "g"),
 Subst.behavior=c("unsubst","subst")),
 .Names=c("Stem-initial consonant",
 "Behavior according to dictionary")),
 class="table")
all_Cs
##                       Behavior according to dictionary
## Stem-initial consonant unsubst subst
##                    p        10   253
##                    t/s      40   430
##                    k        17   185
##                    b       100   177
##                    d        70    25
##                    g        97     1
#Here's the mosaic plot with no numbers on the tiles. A nicer version is being written to a PNG file next:

mosaic(all_Cs,direction="v",pop=FALSE,
 gp_varnames=gpSerif,gp_labels=gpSerif,
 gp=gpar(fill=c("white","black"))
 #gp=gpar(fill=c("white","black"), fontfamily="HersheySerif")
 ,labeling_args=list(
rot_labels=c(left=0)
#pos_varnames="left",pos_labels="right",just_labels="center",just_varnames="center"
,offset_varnames=c(left=1.5),offset_labels=c(left=0.5)
# ,abbreviate=c('Behavior according to dictionary'=5)
))

plot of chunk mosaicPlotComparingCs

#set up PNG file
png(file="Tagalog_mosaic_plot_by_consonant.png",width=myResMultiplier*600,height=myResMultiplier*350, res=myResMultiplier*72)
par(mar=c(5,5,2,1)+0.1)

#make a basic mosaic plot, but with no numbers in the tiles
mosaic(all_Cs,direction="v",pop=FALSE,
 gp_varnames=gpSerif,gp_labels=gpSerif,
 gp=gpar(fill=c("white","black"))
 ,labeling_args=list(
rot_labels=c(left=0)
#pos_varnames="left",pos_labels="right",just_labels="center",just_varnames="center"
,offset_varnames=c(left=1.5),offset_labels=c(left=0.5)
# ,abbreviate=c('Behavior according to dictionary'=5)
))

#add numbers to tiles if >0
##numbers for unsubstituted
seekViewport("cell:Stem-initial consonant=p,Behavior according to dictionary=unsubst")
if(all_Cs[1,1]>0) {grid.text(all_Cs[1,1],gp=gpar(col="black",fontfamily=myFontFamily))}
seekViewport("cell:Stem-initial consonant=t/s,Behavior according to dictionary=unsubst")
if(all_Cs[2,1]>0) {grid.text(all_Cs[2,1],gp=gpar(col="black",fontfamily=myFontFamily))}
seekViewport("cell:Stem-initial consonant=k,Behavior according to dictionary=unsubst")
if(all_Cs[3,1]>0) {grid.text(all_Cs[3,1],gp=gpar(col="black",fontfamily=myFontFamily))}
seekViewport("cell:Stem-initial consonant=b,Behavior according to dictionary=unsubst")
if(all_Cs[4,1]>0) {grid.text(all_Cs[4,1],gp=gpar(col="black",fontfamily=myFontFamily))}
seekViewport("cell:Stem-initial consonant=d,Behavior according to dictionary=unsubst")
if(all_Cs[5,1]>0) {grid.text(all_Cs[5,1],gp=gpar(col="black",fontfamily=myFontFamily))}
seekViewport("cell:Stem-initial consonant=g,Behavior according to dictionary=unsubst")
if(all_Cs[6,1]>0) {grid.text(all_Cs[6,1],gp=gpar(col="black",fontfamily=myFontFamily))}

##numbers for substituted
seekViewport("cell:Stem-initial consonant=p,Behavior according to dictionary=subst")
if(all_Cs[1,2]>0) {grid.text(all_Cs[1,2],gp=gpar(col="white",fontfamily=myFontFamily))}
seekViewport("cell:Stem-initial consonant=t/s,Behavior according to dictionary=subst")
if(all_Cs[2,2]>0) {grid.text(all_Cs[2,2],gp=gpar(col="white",fontfamily=myFontFamily))}
seekViewport("cell:Stem-initial consonant=k,Behavior according to dictionary=subst")
if(all_Cs[3,2]>0) {grid.text(all_Cs[3,2],gp=gpar(col="white",fontfamily=myFontFamily))}
seekViewport("cell:Stem-initial consonant=b,Behavior according to dictionary=subst")
if(all_Cs[4,2]>0) {grid.text(all_Cs[4,2],gp=gpar(col="white",fontfamily=myFontFamily))}
seekViewport("cell:Stem-initial consonant=d,Behavior according to dictionary=subst")
if(all_Cs[5,2]>0) {grid.text(all_Cs[5,2],gp=gpar(col="white",fontfamily=myFontFamily))}
#Suppressing the number 1 for g, because it doesn't quite fit in the tile. In the paper, there's a textbox next to the figure with the number 1 instead.
#seekViewport("cell:Stem-initial consonant=g,Behavior according to dictionary=subst")
#if(all_Cs[6,2]>0) {grid.text(all_Cs[6,2],gp=gpar(col="white"))}

dev.off()
## pdf 
##   2

All consonants lumped together, data separated by morphology. It would be elegant to get the counts from the tag table as above, but we already had code with the numbers hard-coded in:

#set up the data
all_prefixes<-structure(c(26,37,53,48,116,51, 1,1,3,0,51,10, 317,177,105,83,97,8 ),
 .Dim=as.integer(c(6,3)),
 .Dimnames=structure(list(Stem.initial.obstruent=
 c("maN- other", "paN-RED-", "maN-RED-", "maN- adv", "paN- noun", "paN- res"), #unfortunately, mosaic() can't handle Unicode in dimension names
 Subst.behavior=c("unsubst","vary","subst")),
 .Names=c("Prefixing construction",
 "Behavior according to dictionary")),
 class="table")
all_prefixes
##                       Behavior according to dictionary
## Prefixing construction unsubst vary subst
##             maN- other      26    1   317
##             paN-RED-        37    1   177
##             maN-RED-        53    3   105
##             maN- adv        48    0    83
##             paN- noun      116   51    97
##             paN- res        51   10     8
#a quick version of the plot, with no numbers written on the tiles
par(mar=c(5,5,2,1)+0.1)
mosaic(all_prefixes,direction="v",pop=FALSE,
 gp_varnames=gpSerif,gp_labels=gpSerif,
 gp=gpar(fill=c("white","grey","black"))
 ,labeling_args=list(
rot_labels=c(left=0,top=60)
,offset_varnames=c(left=1.5,top=2.5),offset_labels=c(left=0.5,top=1)
)
,margins=c(top=5)
)

plot of chunk mosaicPlotComparingMorph

#a nicer version, printed to png
png(file="Tagalog_mosaic_plot_by_morphology.png",width=myResMultiplier*600,height=myResMultiplier*350, res=myResMultiplier*72)
par(mar=c(5,5,2,1)+0.1) 

mosaic(all_prefixes,direction="v",pop=FALSE,
 gp_varnames=gpSerif,gp_labels=gpSerif,
 gp=gpar(fill=c("white","grey","black"))
 ,labeling_args=list(
   rot_labels=c(left=0,top=60)
   ,offset_varnames=c(left=1.5,top=2.5),offset_labels=c(left=0.5,top=1)
)
,margins=c(top=5))

seekViewport("cell:Prefixing construction=maN- other,Behavior according to dictionary=unsubst")
if(all_prefixes[1,1]>0) {grid.text(all_prefixes[1,1],gp=gpar(col="black",fontfamily=myFontFamily))}
seekViewport("cell:Prefixing construction=paN-RED-,Behavior according to dictionary=unsubst")
if(all_prefixes[2,1]>0) {grid.text(all_prefixes[2,1],gp=gpar(col="black",fontfamily=myFontFamily))}
seekViewport("cell:Prefixing construction=maN-RED-,Behavior according to dictionary=unsubst")
if(all_prefixes[3,1]>0) {grid.text(all_prefixes[3,1],gp=gpar(col="black",fontfamily=myFontFamily))}
seekViewport("cell:Prefixing construction=maN- adv,Behavior according to dictionary=unsubst")
if(all_prefixes[4,1]>0) {grid.text(all_prefixes[4,1],gp=gpar(col="black",fontfamily=myFontFamily))}
seekViewport("cell:Prefixing construction=paN- noun,Behavior according to dictionary=unsubst")
if(all_prefixes[5,1]>0) {grid.text(all_prefixes[5,1],gp=gpar(col="black",fontfamily=myFontFamily))}
seekViewport("cell:Prefixing construction=paN- res,Behavior according to dictionary=unsubst")
if(all_prefixes[6,1]>0) {grid.text(all_prefixes[6,1],gp=gpar(col="black",fontfamily=myFontFamily))}

seekViewport("cell:Prefixing construction=maN- other,Behavior according to dictionary=vary")
if(all_prefixes[1,2]>0) {grid.text(all_prefixes[1,2],gp=gpar(col="black",fontfamily=myFontFamily))}
seekViewport("cell:Prefixing construction=paN-RED-,Behavior according to dictionary=vary")
if(all_prefixes[2,2]>0) {grid.text(all_prefixes[2,2],gp=gpar(col="black",fontfamily=myFontFamily))}
seekViewport("cell:Prefixing construction=maN-RED-,Behavior according to dictionary=vary")
if(all_prefixes[3,2]>0) {grid.text(all_prefixes[3,2],gp=gpar(col="black",fontfamily=myFontFamily))}
seekViewport("cell:Prefixing construction=maN- adv,Behavior according to dictionary=vary")
if(all_prefixes[4,2]>0) {grid.text(all_prefixes[4,2],gp=gpar(col="black",fontfamily=myFontFamily))}
seekViewport("cell:Prefixing construction=paN- noun,Behavior according to dictionary=vary")
if(all_prefixes[5,2]>0) {grid.text(all_prefixes[5,2],gp=gpar(col="black",fontfamily=myFontFamily))}
seekViewport("cell:Prefixing construction=paN- res,Behavior according to dictionary=vary")
if(all_prefixes[6,2]>0) {grid.text(all_prefixes[6,2],gp=gpar(col="black",fontfamily=myFontFamily))}

seekViewport("cell:Prefixing construction=maN- other,Behavior according to dictionary=subst")
if(all_prefixes[1,3]>0) {grid.text(all_prefixes[1,3],gp=gpar(col="white",fontfamily=myFontFamily))}
seekViewport("cell:Prefixing construction=paN-RED-,Behavior according to dictionary=subst")
if(all_prefixes[2,3]>0) {grid.text(all_prefixes[2,3],gp=gpar(col="white",fontfamily=myFontFamily))}
seekViewport("cell:Prefixing construction=maN-RED-,Behavior according to dictionary=subst")
if(all_prefixes[3,3]>0) {grid.text(all_prefixes[3,3],gp=gpar(col="white",fontfamily=myFontFamily))}
seekViewport("cell:Prefixing construction=maN- adv,Behavior according to dictionary=subst")
if(all_prefixes[4,3]>0) {grid.text(all_prefixes[4,3],gp=gpar(col="white",fontfamily=myFontFamily))}
seekViewport("cell:Prefixing construction=paN- noun,Behavior according to dictionary=subst")
if(all_prefixes[5,3]>0) {grid.text(all_prefixes[5,3],gp=gpar(col="white",fontfamily=myFontFamily))}
seekViewport("cell:Prefixing construction=paN- res,Behavior according to dictionary=subst")
if(all_prefixes[6,3]>0) {grid.text(all_prefixes[6,3],gp=gpar(col="white",fontfamily=myFontFamily))}

#windows.options(width=5.6, height=4.48)
dev.off()
## pdf 
##   2

Each affix as separate plots, comparing Cs (does not appear in paper):

paN_RED<-structure(c(0,0,0,1,5,7,17,7,0,0,0,0,1,0,0,0,39,22,41,25,29,3,1,17),
 .Dim=as.integer(c(8,3)),
 .Dimnames=structure(list(Stem.initial.obstruent=
 c("p","t","s","k","b","d","g","glottal"),
 Subst.behavior=c("unsubst","vary","subst")),
 .Names=c("Stem-initial obstruent",
 "Behavior according to dictionary")),
 class="table")
paN_RED
##                       Behavior according to dictionary
## Stem-initial obstruent unsubst vary subst
##                p             0    0    39
##                t             0    0    22
##                s             0    0    41
##                k             1    0    25
##                b             5    1    29
##                d             7    0     3
##                g            17    0     1
##                glottal       7    0    17
mosaic(paN_RED,direction="v",pop=FALSE,
 gp=gpar(fill=c("white","grey","black"))
 ,labeling_args=list(
rot_labels=c(left=0)
#pos_varnames="left",pos_labels="right",just_labels="center",just_varnames="center"
,offset_varnames=c(left=1.5),offset_labels=c(left=0.5)
# ,abbreviate=c('Behavior according to dictionary'=5)
))

seekViewport("cell:Stem-initial obstruent=p,Behavior according to dictionary=unsubst")
if(paN_RED[1,1]>0) {grid.text(paN_RED[1,1],gp=gpar(col="black"))}
seekViewport("cell:Stem-initial obstruent=t,Behavior according to dictionary=unsubst")
if(paN_RED[2,1]>0) {grid.text(paN_RED[2,1],gp=gpar(col="black"))}
seekViewport("cell:Stem-initial obstruent=s,Behavior according to dictionary=unsubst")
if(paN_RED[3,1]>0) {grid.text(paN_RED[3,1],gp=gpar(col="black"))}
seekViewport("cell:Stem-initial obstruent=k,Behavior according to dictionary=unsubst")
if(paN_RED[4,1]>0) {grid.text(paN_RED[4,1],gp=gpar(col="black"))}
seekViewport("cell:Stem-initial obstruent=b,Behavior according to dictionary=unsubst")
if(paN_RED[5,1]>0) {grid.text(paN_RED[5,1],gp=gpar(col="black"))}
seekViewport("cell:Stem-initial obstruent=d,Behavior according to dictionary=unsubst")
if(paN_RED[6,1]>0) {grid.text(paN_RED[6,1],gp=gpar(col="black"))}
seekViewport("cell:Stem-initial obstruent=g,Behavior according to dictionary=unsubst")
if(paN_RED[7,1]>0) {grid.text(paN_RED[7,1],gp=gpar(col="black"))}
seekViewport("cell:Stem-initial obstruent=glottal,Behavior according to dictionary=unsubst")
if(paN_RED[8,1]>0) {grid.text(paN_RED[8,1],gp=gpar(col="black"))}

seekViewport("cell:Stem-initial obstruent=p,Behavior according to dictionary=vary")
if(paN_RED[1,2]>0) {grid.text(paN_RED[1,2],gp=gpar(col="black"))}
seekViewport("cell:Stem-initial obstruent=t,Behavior according to dictionary=vary")
if(paN_RED[2,2]>0) {grid.text(paN_RED[2,2],gp=gpar(col="black"))}
seekViewport("cell:Stem-initial obstruent=s,Behavior according to dictionary=vary")
if(paN_RED[3,2]>0) {grid.text(paN_RED[3,2],gp=gpar(col="black"))}
seekViewport("cell:Stem-initial obstruent=k,Behavior according to dictionary=vary")
if(paN_RED[4,2]>0) {grid.text(paN_RED[4,2],gp=gpar(col="black"))}
seekViewport("cell:Stem-initial obstruent=b,Behavior according to dictionary=vary")
if(paN_RED[5,2]>0) {grid.text(paN_RED[5,2],gp=gpar(col="black"))}
seekViewport("cell:Stem-initial obstruent=d,Behavior according to dictionary=vary")
if(paN_RED[6,2]>0) {grid.text(paN_RED[6,2],gp=gpar(col="black"))}
seekViewport("cell:Stem-initial obstruent=g,Behavior according to dictionary=vary")
if(paN_RED[7,2]>0) {grid.text(paN_RED[7,2],gp=gpar(col="black"))}
seekViewport("cell:Stem-initial obstruent=glottal,Behavior according to dictionary=vary")
if(paN_RED[8,2]>0) {grid.text(paN_RED[8,2],gp=gpar(col="black"))}

seekViewport("cell:Stem-initial obstruent=p,Behavior according to dictionary=subst")
if(paN_RED[1,3]>0) {grid.text(paN_RED[1,3],gp=gpar(col="white"))}
seekViewport("cell:Stem-initial obstruent=t,Behavior according to dictionary=subst")
if(paN_RED[2,3]>0) {grid.text(paN_RED[2,3],gp=gpar(col="white"))}
seekViewport("cell:Stem-initial obstruent=s,Behavior according to dictionary=subst")
if(paN_RED[3,3]>0) {grid.text(paN_RED[3,3],gp=gpar(col="white"))}
seekViewport("cell:Stem-initial obstruent=k,Behavior according to dictionary=subst")
if(paN_RED[4,3]>0) {grid.text(paN_RED[4,3],gp=gpar(col="white"))}
seekViewport("cell:Stem-initial obstruent=b,Behavior according to dictionary=subst")
if(paN_RED[5,3]>0) {grid.text(paN_RED[5,3],gp=gpar(col="white"))}
seekViewport("cell:Stem-initial obstruent=d,Behavior according to dictionary=subst")
if(paN_RED[6,3]>0) {grid.text(paN_RED[6,3],gp=gpar(col="white"))}
seekViewport("cell:Stem-initial obstruent=g,Behavior according to dictionary=subst")
if(paN_RED[7,3]>0) {grid.text(paN_RED[7,3],gp=gpar(col="white"))}
seekViewport("cell:Stem-initial obstruent=glottal,Behavior according to dictionary=subst")
if(paN_RED[8,3]>0) {grid.text(paN_RED[8,3],gp=gpar(col="white"))}

plot of chunk mosaicPlotsForEachMorph

windows.options(width=5.6, height=4.48)

maN_RED<-structure(c(0,0,1,2,20,12,12,6,0,0,0,0,3,0,0,0,18,19,25,20,15,1,0,7),
 .Dim=as.integer(c(8,3)),
 .Dimnames=structure(list(Stem.initial.obstruent=
 c("p","t","s","k","b","d","g","glottal"),
 Subst.behavior=c("unsubst","vary","subst")),
 .Names=c("Stem-initial obstruent",
 "Behavior according to dictionary")),
 class="table")
maN_RED
##                       Behavior according to dictionary
## Stem-initial obstruent unsubst vary subst
##                p             0    0    18
##                t             0    0    19
##                s             1    0    25
##                k             2    0    20
##                b            20    3    15
##                d            12    0     1
##                g            12    0     0
##                glottal       6    0     7
mosaic(maN_RED,direction="v",pop=FALSE,
 gp=gpar(fill=c("white","grey","black"))
 ,labeling_args=list(
rot_labels=c(left=0)
,offset_varnames=c(left=1.5),offset_labels=c(left=0.5)
))

seekViewport("cell:Stem-initial obstruent=p,Behavior according to dictionary=unsubst")
if(maN_RED[1,1]>0) {grid.text(maN_RED[1,1],gp=gpar(col="black"))}
seekViewport("cell:Stem-initial obstruent=t,Behavior according to dictionary=unsubst")
if(maN_RED[2,1]>0) {grid.text(maN_RED[2,1],gp=gpar(col="black"))}
seekViewport("cell:Stem-initial obstruent=s,Behavior according to dictionary=unsubst")
if(maN_RED[3,1]>0) {grid.text(maN_RED[3,1],gp=gpar(col="black"))}
seekViewport("cell:Stem-initial obstruent=k,Behavior according to dictionary=unsubst")
if(maN_RED[4,1]>0) {grid.text(maN_RED[4,1],gp=gpar(col="black"))}
seekViewport("cell:Stem-initial obstruent=b,Behavior according to dictionary=unsubst")
if(maN_RED[5,1]>0) {grid.text(maN_RED[5,1],gp=gpar(col="black"))}
seekViewport("cell:Stem-initial obstruent=d,Behavior according to dictionary=unsubst")
if(maN_RED[6,1]>0) {grid.text(maN_RED[6,1],gp=gpar(col="black"))}
seekViewport("cell:Stem-initial obstruent=g,Behavior according to dictionary=unsubst")
if(maN_RED[7,1]>0) {grid.text(maN_RED[7,1],gp=gpar(col="black"))}
seekViewport("cell:Stem-initial obstruent=glottal,Behavior according to dictionary=unsubst")
if(maN_RED[8,1]>0) {grid.text(maN_RED[8,1],gp=gpar(col="black"))}

seekViewport("cell:Stem-initial obstruent=p,Behavior according to dictionary=vary")
if(maN_RED[1,2]>0) {grid.text(maN_RED[1,2],gp=gpar(col="black"))}
seekViewport("cell:Stem-initial obstruent=t,Behavior according to dictionary=vary")
if(maN_RED[2,2]>0) {grid.text(maN_RED[2,2],gp=gpar(col="black"))}
seekViewport("cell:Stem-initial obstruent=s,Behavior according to dictionary=vary")
if(maN_RED[3,2]>0) {grid.text(maN_RED[3,2],gp=gpar(col="black"))}
seekViewport("cell:Stem-initial obstruent=k,Behavior according to dictionary=vary")
if(maN_RED[4,2]>0) {grid.text(maN_RED[4,2],gp=gpar(col="black"))}
seekViewport("cell:Stem-initial obstruent=b,Behavior according to dictionary=vary")
if(maN_RED[5,2]>0) {grid.text(maN_RED[5,2],gp=gpar(col="black"))}
seekViewport("cell:Stem-initial obstruent=d,Behavior according to dictionary=vary")
if(maN_RED[6,2]>0) {grid.text(maN_RED[6,2],gp=gpar(col="black"))}
seekViewport("cell:Stem-initial obstruent=g,Behavior according to dictionary=vary")
if(maN_RED[7,2]>0) {grid.text(maN_RED[7,2],gp=gpar(col="black"))}
seekViewport("cell:Stem-initial obstruent=glottal,Behavior according to dictionary=vary")
if(maN_RED[8,2]>0) {grid.text(maN_RED[8,2],gp=gpar(col="black"))}

seekViewport("cell:Stem-initial obstruent=p,Behavior according to dictionary=subst")
if(maN_RED[1,3]>0) {grid.text(maN_RED[1,3],gp=gpar(col="white"))}
seekViewport("cell:Stem-initial obstruent=t,Behavior according to dictionary=subst")
if(maN_RED[2,3]>0) {grid.text(maN_RED[2,3],gp=gpar(col="white"))}
seekViewport("cell:Stem-initial obstruent=s,Behavior according to dictionary=subst")
if(maN_RED[3,3]>0) {grid.text(maN_RED[3,3],gp=gpar(col="white"))}
seekViewport("cell:Stem-initial obstruent=k,Behavior according to dictionary=subst")
if(maN_RED[4,3]>0) {grid.text(maN_RED[4,3],gp=gpar(col="white"))}
seekViewport("cell:Stem-initial obstruent=b,Behavior according to dictionary=subst")
if(maN_RED[5,3]>0) {grid.text(maN_RED[5,3],gp=gpar(col="white"))}
seekViewport("cell:Stem-initial obstruent=d,Behavior according to dictionary=subst")
if(maN_RED[6,3]>0) {grid.text(maN_RED[6,3],gp=gpar(col="white"))}
seekViewport("cell:Stem-initial obstruent=g,Behavior according to dictionary=subst")
if(maN_RED[7,3]>0) {grid.text(maN_RED[7,3],gp=gpar(col="white"))}
seekViewport("cell:Stem-initial obstruent=glottal,Behavior according to dictionary=subst")
if(maN_RED[8,3]>0) {grid.text(maN_RED[8,3],gp=gpar(col="white"))}

plot of chunk mosaicPlotsForEachMorph

maN_adv<-structure(c(0,0,0,0,6,12,9,21,0,0,0,0,0,0,0,0,11,12,39,11,6,0,0,4),
 .Dim=as.integer(c(8,3)),
 .Dimnames=structure(list(Stem.initial.obstruent=
 c("p","t","s","k","b","d","g","glottal"),
 Subst.behavior=c("unsubst","vary","subst")),
 .Names=c("Stem-initial obstruent",
 "Behavior according to dictionary")),
 class="table")
maN_adv
##                       Behavior according to dictionary
## Stem-initial obstruent unsubst vary subst
##                p             0    0    11
##                t             0    0    12
##                s             0    0    39
##                k             0    0    11
##                b             6    0     6
##                d            12    0     0
##                g             9    0     0
##                glottal      21    0     4
mosaic(maN_adv,direction="v",pop=FALSE,
 gp=gpar(fill=c("white","grey","black"))
 ,labeling_args=list(
rot_labels=c(left=0)
,offset_varnames=c(left=1.5),offset_labels=c(left=0.5)
))

seekViewport("cell:Stem-initial obstruent=p,Behavior according to dictionary=unsubst")
if(maN_adv[1,1]>0) {grid.text(maN_adv[1,1],gp=gpar(col="black"))}
seekViewport("cell:Stem-initial obstruent=t,Behavior according to dictionary=unsubst")
if(maN_adv[2,1]>0) {grid.text(maN_adv[2,1],gp=gpar(col="black"))}
seekViewport("cell:Stem-initial obstruent=s,Behavior according to dictionary=unsubst")
if(maN_adv[3,1]>0) {grid.text(maN_adv[3,1],gp=gpar(col="black"))}
seekViewport("cell:Stem-initial obstruent=k,Behavior according to dictionary=unsubst")
if(maN_adv[4,1]>0) {grid.text(maN_adv[4,1],gp=gpar(col="black"))}
seekViewport("cell:Stem-initial obstruent=b,Behavior according to dictionary=unsubst")
if(maN_adv[5,1]>0) {grid.text(maN_adv[5,1],gp=gpar(col="black"))}
seekViewport("cell:Stem-initial obstruent=d,Behavior according to dictionary=unsubst")
if(maN_adv[6,1]>0) {grid.text(maN_adv[6,1],gp=gpar(col="black"))}
seekViewport("cell:Stem-initial obstruent=g,Behavior according to dictionary=unsubst")
if(maN_adv[7,1]>0) {grid.text(maN_adv[7,1],gp=gpar(col="black"))}
seekViewport("cell:Stem-initial obstruent=glottal,Behavior according to dictionary=unsubst")
if(maN_adv[8,1]>0) {grid.text(maN_adv[8,1],gp=gpar(col="black"))}

seekViewport("cell:Stem-initial obstruent=p,Behavior according to dictionary=vary")
if(maN_adv[1,2]>0) {grid.text(maN_adv[1,2],gp=gpar(col="black"))}
seekViewport("cell:Stem-initial obstruent=t,Behavior according to dictionary=vary")
if(maN_adv[2,2]>0) {grid.text(maN_adv[2,2],gp=gpar(col="black"))}
seekViewport("cell:Stem-initial obstruent=s,Behavior according to dictionary=vary")
if(maN_adv[3,2]>0) {grid.text(maN_adv[3,2],gp=gpar(col="black"))}
seekViewport("cell:Stem-initial obstruent=k,Behavior according to dictionary=vary")
if(maN_adv[4,2]>0) {grid.text(maN_adv[4,2],gp=gpar(col="black"))}
seekViewport("cell:Stem-initial obstruent=b,Behavior according to dictionary=vary")
if(maN_adv[5,2]>0) {grid.text(maN_adv[5,2],gp=gpar(col="black"))}
seekViewport("cell:Stem-initial obstruent=d,Behavior according to dictionary=vary")
if(maN_adv[6,2]>0) {grid.text(maN_adv[6,2],gp=gpar(col="black"))}
seekViewport("cell:Stem-initial obstruent=g,Behavior according to dictionary=vary")
if(maN_adv[7,2]>0) {grid.text(maN_adv[7,2],gp=gpar(col="black"))}
seekViewport("cell:Stem-initial obstruent=glottal,Behavior according to dictionary=vary")
if(maN_adv[8,2]>0) {grid.text(maN_adv[8,2],gp=gpar(col="black"))}

seekViewport("cell:Stem-initial obstruent=p,Behavior according to dictionary=subst")
if(maN_adv[1,3]>0) {grid.text(maN_adv[1,3],gp=gpar(col="white"))}
seekViewport("cell:Stem-initial obstruent=t,Behavior according to dictionary=subst")
if(maN_adv[2,3]>0) {grid.text(maN_adv[2,3],gp=gpar(col="white"))}
seekViewport("cell:Stem-initial obstruent=s,Behavior according to dictionary=subst")
if(maN_adv[3,3]>0) {grid.text(maN_adv[3,3],gp=gpar(col="white"))}
seekViewport("cell:Stem-initial obstruent=k,Behavior according to dictionary=subst")
if(maN_adv[4,3]>0) {grid.text(maN_adv[4,3],gp=gpar(col="white"))}
seekViewport("cell:Stem-initial obstruent=b,Behavior according to dictionary=subst")
if(maN_adv[5,3]>0) {grid.text(maN_adv[5,3],gp=gpar(col="white"))}
seekViewport("cell:Stem-initial obstruent=d,Behavior according to dictionary=subst")
if(maN_adv[6,3]>0) {grid.text(maN_adv[6,3],gp=gpar(col="white"))}
seekViewport("cell:Stem-initial obstruent=g,Behavior according to dictionary=subst")
if(maN_adv[7,3]>0) {grid.text(maN_adv[7,3],gp=gpar(col="white"))}
seekViewport("cell:Stem-initial obstruent=glottal,Behavior according to dictionary=subst")
if(maN_adv[8,3]>0) {grid.text(maN_adv[8,3],gp=gpar(col="white"))}

plot of chunk mosaicPlotsForEachMorph

maN_other<-structure(c(0,0,0,0,6,3,13,4,0,0,0,1,0,0,0,0,65,39,32,74,66,7,0,34),
 .Dim=as.integer(c(8,3)),
 .Dimnames=structure(list(Stem.initial.obstruent=
 c("p","t","s","k","b","d","g","glottal"),
 Subst.behavior=c("unsubst","vary","subst")),
 .Names=c("Stem-initial obstruent",
 "Behavior according to dictionary")),
 class="table")
maN_other
##                       Behavior according to dictionary
## Stem-initial obstruent unsubst vary subst
##                p             0    0    65
##                t             0    0    39
##                s             0    0    32
##                k             0    1    74
##                b             6    0    66
##                d             3    0     7
##                g            13    0     0
##                glottal       4    0    34
mosaic(maN_other,direction="v",pop=FALSE,
 gp=gpar(fill=c("white","grey","black"))
 ,labeling_args=list(
rot_labels=c(left=0)
,gp_labels=gpar(fontsize=10)
,offset_varnames=c(left=1.5),offset_labels=c(left=0.5)
))

seekViewport("cell:Stem-initial obstruent=p,Behavior according to dictionary=unsubst")
if(maN_other[1,1]>0) {grid.text(maN_other[1,1],gp=gpar(col="black"))}
seekViewport("cell:Stem-initial obstruent=t,Behavior according to dictionary=unsubst")
if(maN_other[2,1]>0) {grid.text(maN_other[2,1],gp=gpar(col="black"))}
seekViewport("cell:Stem-initial obstruent=s,Behavior according to dictionary=unsubst")
if(maN_other[3,1]>0) {grid.text(maN_other[3,1],gp=gpar(col="black"))}
seekViewport("cell:Stem-initial obstruent=k,Behavior according to dictionary=unsubst")
if(maN_other[4,1]>0) {grid.text(maN_other[4,1],gp=gpar(col="black"))}
seekViewport("cell:Stem-initial obstruent=b,Behavior according to dictionary=unsubst")
if(maN_other[5,1]>0) {grid.text(maN_other[5,1],gp=gpar(col="black"))}
seekViewport("cell:Stem-initial obstruent=d,Behavior according to dictionary=unsubst")
if(maN_other[6,1]>0) {grid.text(maN_other[6,1],gp=gpar(col="black"))}
seekViewport("cell:Stem-initial obstruent=g,Behavior according to dictionary=unsubst")
if(maN_other[7,1]>0) {grid.text(maN_other[7,1],gp=gpar(col="black"))}
seekViewport("cell:Stem-initial obstruent=glottal,Behavior according to dictionary=unsubst")
if(maN_other[8,1]>0) {grid.text(maN_other[8,1],gp=gpar(col="black"))}

seekViewport("cell:Stem-initial obstruent=p,Behavior according to dictionary=vary")
if(maN_other[1,2]>0) {grid.text(maN_other[1,2],gp=gpar(col="black"))}
seekViewport("cell:Stem-initial obstruent=t,Behavior according to dictionary=vary")
if(maN_other[2,2]>0) {grid.text(maN_other[2,2],gp=gpar(col="black"))}
seekViewport("cell:Stem-initial obstruent=s,Behavior according to dictionary=vary")
if(maN_other[3,2]>0) {grid.text(maN_other[3,2],gp=gpar(col="black"))}
seekViewport("cell:Stem-initial obstruent=k,Behavior according to dictionary=vary")
if(maN_other[4,2]>0) {grid.text(maN_other[4,2],gp=gpar(col="black"))}
seekViewport("cell:Stem-initial obstruent=b,Behavior according to dictionary=vary")
if(maN_other[5,2]>0) {grid.text(maN_other[5,2],gp=gpar(col="black"))}
seekViewport("cell:Stem-initial obstruent=d,Behavior according to dictionary=vary")
if(maN_other[6,2]>0) {grid.text(maN_other[6,2],gp=gpar(col="black"))}
seekViewport("cell:Stem-initial obstruent=g,Behavior according to dictionary=vary")
if(maN_other[7,2]>0) {grid.text(maN_other[7,2],gp=gpar(col="black"))}
seekViewport("cell:Stem-initial obstruent=glottal,Behavior according to dictionary=vary")
if(maN_other[8,2]>0) {grid.text(maN_other[8,2],gp=gpar(col="black"))}

seekViewport("cell:Stem-initial obstruent=p,Behavior according to dictionary=subst")
if(maN_other[1,3]>0) {grid.text(maN_other[1,3],gp=gpar(col="white"))}
seekViewport("cell:Stem-initial obstruent=t,Behavior according to dictionary=subst")
if(maN_other[2,3]>0) {grid.text(maN_other[2,3],gp=gpar(col="white"))}
seekViewport("cell:Stem-initial obstruent=s,Behavior according to dictionary=subst")
if(maN_other[3,3]>0) {grid.text(maN_other[3,3],gp=gpar(col="white"))}
seekViewport("cell:Stem-initial obstruent=k,Behavior according to dictionary=subst")
if(maN_other[4,3]>0) {grid.text(maN_other[4,3],gp=gpar(col="white"))}
seekViewport("cell:Stem-initial obstruent=b,Behavior according to dictionary=subst")
if(maN_other[5,3]>0) {grid.text(maN_other[5,3],gp=gpar(col="white"))}
seekViewport("cell:Stem-initial obstruent=d,Behavior according to dictionary=subst")
if(maN_other[6,3]>0) {grid.text(maN_other[6,3],gp=gpar(col="white"))}
seekViewport("cell:Stem-initial obstruent=g,Behavior according to dictionary=subst")
if(maN_other[7,3]>0) {grid.text(maN_other[7,3],gp=gpar(col="white"))}
seekViewport("cell:Stem-initial obstruent=glottal,Behavior according to dictionary=subst")
if(maN_other[8,3]>0) {grid.text(maN_other[8,3],gp=gpar(col="white"))}

plot of chunk mosaicPlotsForEachMorph

paN_noun<-structure(c(3,8,6,7,26,17,27,22,7,18,13,5,8,0,0,0,27,25,20,7,11,1,0,6),
 .Dim=as.integer(c(8,3)),
 .Dimnames=structure(list(Stem.initial.obstruent=
 c("p","t","s","k","b","d","g","glottal"),
 Subst.behavior=c("unsubst","vary","subst")),
 .Names=c("Stem-initial obstruent",
 "Behavior according to dictionary")),
 class="table")
paN_noun
##                       Behavior according to dictionary
## Stem-initial obstruent unsubst vary subst
##                p             3    7    27
##                t             8   18    25
##                s             6   13    20
##                k             7    5     7
##                b            26    8    11
##                d            17    0     1
##                g            27    0     0
##                glottal      22    0     6
mosaic(paN_noun,direction="v",pop=FALSE,
 gp=gpar(fill=c("white","grey","black"))
 ,labeling_args=list(
rot_labels=c(left=0)
,offset_varnames=c(left=1.5),offset_labels=c(left=0.5)
))

seekViewport("cell:Stem-initial obstruent=p,Behavior according to dictionary=unsubst")
if(paN_noun[1,1]>0) {grid.text(paN_noun[1,1],gp=gpar(col="black"))}
seekViewport("cell:Stem-initial obstruent=t,Behavior according to dictionary=unsubst")
if(paN_noun[2,1]>0) {grid.text(paN_noun[2,1],gp=gpar(col="black"))}
seekViewport("cell:Stem-initial obstruent=s,Behavior according to dictionary=unsubst")
if(paN_noun[3,1]>0) {grid.text(paN_noun[3,1],gp=gpar(col="black"))}
seekViewport("cell:Stem-initial obstruent=k,Behavior according to dictionary=unsubst")
if(paN_noun[4,1]>0) {grid.text(paN_noun[4,1],gp=gpar(col="black"))}
seekViewport("cell:Stem-initial obstruent=b,Behavior according to dictionary=unsubst")
if(paN_noun[5,1]>0) {grid.text(paN_noun[5,1],gp=gpar(col="black"))}
seekViewport("cell:Stem-initial obstruent=d,Behavior according to dictionary=unsubst")
if(paN_noun[6,1]>0) {grid.text(paN_noun[6,1],gp=gpar(col="black"))}
seekViewport("cell:Stem-initial obstruent=g,Behavior according to dictionary=unsubst")
if(paN_noun[7,1]>0) {grid.text(paN_noun[7,1],gp=gpar(col="black"))}
seekViewport("cell:Stem-initial obstruent=glottal,Behavior according to dictionary=unsubst")
if(paN_noun[8,1]>0) {grid.text(paN_noun[8,1],gp=gpar(col="black"))}

seekViewport("cell:Stem-initial obstruent=p,Behavior according to dictionary=vary")
if(paN_noun[1,2]>0) {grid.text(paN_noun[1,2],gp=gpar(col="black"))}
seekViewport("cell:Stem-initial obstruent=t,Behavior according to dictionary=vary")
if(paN_noun[2,2]>0) {grid.text(paN_noun[2,2],gp=gpar(col="black"))}
seekViewport("cell:Stem-initial obstruent=s,Behavior according to dictionary=vary")
if(paN_noun[3,2]>0) {grid.text(paN_noun[3,2],gp=gpar(col="black"))}
seekViewport("cell:Stem-initial obstruent=k,Behavior according to dictionary=vary")
if(paN_noun[4,2]>0) {grid.text(paN_noun[4,2],gp=gpar(col="black"))}
seekViewport("cell:Stem-initial obstruent=b,Behavior according to dictionary=vary")
if(paN_noun[5,2]>0) {grid.text(paN_noun[5,2],gp=gpar(col="black"))}
seekViewport("cell:Stem-initial obstruent=d,Behavior according to dictionary=vary")
if(paN_noun[6,2]>0) {grid.text(paN_noun[6,2],gp=gpar(col="black"))}
seekViewport("cell:Stem-initial obstruent=g,Behavior according to dictionary=vary")
if(paN_noun[7,2]>0) {grid.text(paN_noun[7,2],gp=gpar(col="black"))}
seekViewport("cell:Stem-initial obstruent=glottal,Behavior according to dictionary=vary")
if(paN_noun[8,2]>0) {grid.text(paN_noun[8,2],gp=gpar(col="black"))}

seekViewport("cell:Stem-initial obstruent=p,Behavior according to dictionary=subst")
if(paN_noun[1,3]>0) {grid.text(paN_noun[1,3],gp=gpar(col="white"))}
seekViewport("cell:Stem-initial obstruent=t,Behavior according to dictionary=subst")
if(paN_noun[2,3]>0) {grid.text(paN_noun[2,3],gp=gpar(col="white"))}
seekViewport("cell:Stem-initial obstruent=s,Behavior according to dictionary=subst")
if(paN_noun[3,3]>0) {grid.text(paN_noun[3,3],gp=gpar(col="white"))}
seekViewport("cell:Stem-initial obstruent=k,Behavior according to dictionary=subst")
if(paN_noun[4,3]>0) {grid.text(paN_noun[4,3],gp=gpar(col="white"))}
seekViewport("cell:Stem-initial obstruent=b,Behavior according to dictionary=subst")
if(paN_noun[5,3]>0) {grid.text(paN_noun[5,3],gp=gpar(col="white"))}
seekViewport("cell:Stem-initial obstruent=d,Behavior according to dictionary=subst")
if(paN_noun[6,3]>0) {grid.text(paN_noun[6,3],gp=gpar(col="white"))}
seekViewport("cell:Stem-initial obstruent=g,Behavior according to dictionary=subst")
if(paN_noun[7,3]>0) {grid.text(paN_noun[7,3],gp=gpar(col="white"))}
seekViewport("cell:Stem-initial obstruent=glottal,Behavior according to dictionary=subst")
if(paN_noun[8,3]>0) {grid.text(paN_noun[8,3],gp=gpar(col="white"))}

plot of chunk mosaicPlotsForEachMorph

paN_res<-structure(c(3,5,5,5,17,7,4,5,3,3,3,0,1,0,0,0,3,2,0,2,1,0,0,0),
 .Dim=as.integer(c(8,3)),
 .Dimnames=structure(list(Stem.initial.obstruent=
 c("p","t","s","k","b","d","g","glottal"),
 Subst.behavior=c("unsubst","vary","subst")),
 .Names=c("Stem-initial obstruent",
 "Behavior according to dictionary")),
 class="table")
paN_res
##                       Behavior according to dictionary
## Stem-initial obstruent unsubst vary subst
##                p             3    3     3
##                t             5    3     2
##                s             5    3     0
##                k             5    0     2
##                b            17    1     1
##                d             7    0     0
##                g             4    0     0
##                glottal       5    0     0
mosaic(paN_res,direction="v",pop=FALSE,
 gp=gpar(fill=c("white","grey","black"))
 ,labeling_args=list(
rot_labels=c(left=0)
,offset_varnames=c(left=1.5),offset_labels=c(left=0.5)
))

seekViewport("cell:Stem-initial obstruent=p,Behavior according to dictionary=unsubst")
if(paN_res[1,1]>0) {grid.text(paN_res[1,1],gp=gpar(col="black"))}
seekViewport("cell:Stem-initial obstruent=t,Behavior according to dictionary=unsubst")
if(paN_res[2,1]>0) {grid.text(paN_res[2,1],gp=gpar(col="black"))}
seekViewport("cell:Stem-initial obstruent=s,Behavior according to dictionary=unsubst")
if(paN_res[3,1]>0) {grid.text(paN_res[3,1],gp=gpar(col="black"))}
seekViewport("cell:Stem-initial obstruent=k,Behavior according to dictionary=unsubst")
if(paN_res[4,1]>0) {grid.text(paN_res[4,1],gp=gpar(col="black"))}
seekViewport("cell:Stem-initial obstruent=b,Behavior according to dictionary=unsubst")
if(paN_res[5,1]>0) {grid.text(paN_res[5,1],gp=gpar(col="black"))}
seekViewport("cell:Stem-initial obstruent=d,Behavior according to dictionary=unsubst")
if(paN_res[6,1]>0) {grid.text(paN_res[6,1],gp=gpar(col="black"))}
seekViewport("cell:Stem-initial obstruent=g,Behavior according to dictionary=unsubst")
if(paN_res[7,1]>0) {grid.text(paN_res[7,1],gp=gpar(col="black"))}
seekViewport("cell:Stem-initial obstruent=glottal,Behavior according to dictionary=unsubst")
if(paN_res[8,1]>0) {grid.text(paN_res[8,1],gp=gpar(col="black"))}

seekViewport("cell:Stem-initial obstruent=p,Behavior according to dictionary=vary")
if(paN_res[1,2]>0) {grid.text(paN_res[1,2],gp=gpar(col="black"))}
seekViewport("cell:Stem-initial obstruent=t,Behavior according to dictionary=vary")
if(paN_res[2,2]>0) {grid.text(paN_res[2,2],gp=gpar(col="black"))}
seekViewport("cell:Stem-initial obstruent=s,Behavior according to dictionary=vary")
if(paN_res[3,2]>0) {grid.text(paN_res[3,2],gp=gpar(col="black"))}
seekViewport("cell:Stem-initial obstruent=k,Behavior according to dictionary=vary")
if(paN_res[4,2]>0) {grid.text(paN_res[4,2],gp=gpar(col="black"))}
seekViewport("cell:Stem-initial obstruent=b,Behavior according to dictionary=vary")
if(paN_res[5,2]>0) {grid.text(paN_res[5,2],gp=gpar(col="black"))}
seekViewport("cell:Stem-initial obstruent=d,Behavior according to dictionary=vary")
if(paN_res[6,2]>0) {grid.text(paN_res[6,2],gp=gpar(col="black"))}
seekViewport("cell:Stem-initial obstruent=g,Behavior according to dictionary=vary")
if(paN_res[7,2]>0) {grid.text(paN_res[7,2],gp=gpar(col="black"))}
seekViewport("cell:Stem-initial obstruent=glottal,Behavior according to dictionary=vary")
if(paN_res[8,2]>0) {grid.text(paN_res[8,2],gp=gpar(col="black"))}

seekViewport("cell:Stem-initial obstruent=p,Behavior according to dictionary=subst")
if(paN_res[1,3]>0) {grid.text(paN_res[1,3],gp=gpar(col="white"))}
seekViewport("cell:Stem-initial obstruent=t,Behavior according to dictionary=subst")
if(paN_res[2,3]>0) {grid.text(paN_res[2,3],gp=gpar(col="white"))}
seekViewport("cell:Stem-initial obstruent=s,Behavior according to dictionary=subst")
if(paN_res[3,3]>0) {grid.text(paN_res[3,3],gp=gpar(col="white"))}
seekViewport("cell:Stem-initial obstruent=k,Behavior according to dictionary=subst")
if(paN_res[4,3]>0) {grid.text(paN_res[4,3],gp=gpar(col="white"))}
seekViewport("cell:Stem-initial obstruent=b,Behavior according to dictionary=subst")
if(paN_res[5,3]>0) {grid.text(paN_res[5,3],gp=gpar(col="white"))}
seekViewport("cell:Stem-initial obstruent=d,Behavior according to dictionary=subst")
if(paN_res[6,3]>0) {grid.text(paN_res[6,3],gp=gpar(col="white"))}
seekViewport("cell:Stem-initial obstruent=g,Behavior according to dictionary=subst")
if(paN_res[7,3]>0) {grid.text(paN_res[7,3],gp=gpar(col="white"))}
seekViewport("cell:Stem-initial obstruent=glottal,Behavior according to dictionary=subst")

plot of chunk mosaicPlotsForEachMorph

if(paN_res[8,3]>0) {grid.text(paN_res[8,3],gp=gpar(col="white"))}

Plot the observed data

par(mar=c(5,5,2,6))
suppressWarnings(with(nas_sub, {
  interaction.plot(x.factor=consonant, 
  trace.factor=morph_unicode, 
  response=subst_rate,
  xlab="stem-initial consonant",ylab="rate of nasal substitution"
  , trace.label="prefix construction", fixed=TRUE, type="b", pch=c(0,1,2,20,18,15))
}))

plot of chunk plotObserved

#Since this will appear in the paper, also do a PNG file:
png(file="Tagalog_interaction_plot.png",width=myResMultiplier*460,height=myResMultiplier*300,res=myResMultiplier*72, family=myFontFamily)
par(mar=c(5,4,2,0)+0.1)
with(nas_sub, {
  interaction.plot(x.factor=consonant, 
  trace.factor=morph_unicode, 
  response=subst_rate,
  xlab="stem-initial consonant",ylab="rate of nasal substitution"
  , trace.label="prefix construction", fixed=TRUE, type="b", pch=c(0,1,2,20,18,15))
})
dev.off()
## pdf 
##   2

Regression model

This isn’t used in the paper, but just for checking that consonant and morphology are in fact significant influences:

nas_sub.glm <- glm(cbind(subst_count, unsubst_count) ~ consonant + morph, data=nas_sub, family=binomial()) #will throw a warning because of the 0.5 values
## Warning: non-integer counts in a binomial glm!
summary(nas_sub.glm)
## 
## Call:
## glm(formula = cbind(subst_count, unsubst_count) ~ consonant + 
##     morph, family = binomial(), data = nas_sub)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.8187  -0.4713  -0.0498   0.8885   2.4389  
## 
## Coefficients:
##                Estimate Std. Error z value Pr(>|z|)    
## (Intercept)       5.999      0.544   11.03  < 2e-16 ***
## consonantt/s     -0.786      0.402   -1.95   0.0507 .  
## consonantk       -1.368      0.479   -2.86   0.0043 ** 
## consonantb       -3.259      0.434   -7.52  5.6e-14 ***
## consonantd       -5.766      0.568  -10.15  < 2e-16 ***
## consonantg       -9.113      1.117   -8.16  3.4e-16 ***
## morphpaN-RED-    -0.869      0.492   -1.77   0.0775 .  
## morphmaN- adv.   -2.146      0.517   -4.15  3.3e-05 ***
## morphmaN-RED-    -2.648      0.444   -5.97  2.4e-09 ***
## morphpaN- noun   -4.248      0.438   -9.69  < 2e-16 ***
## morphpaN- res.   -5.910      0.546  -10.82  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 753.400  on 35  degrees of freedom
## Residual deviance:  33.584  on 25  degrees of freedom
## AIC: 116.7
## 
## Number of Fisher Scoring iterations: 6
Anova(nas_sub.glm)
## Warning: non-integer #successes in a binomial glm!
## Warning: non-integer #successes in a binomial glm!
## Analysis of Deviance Table (Type II tests)
## 
## Response: cbind(subst_count, unsubst_count)
##           LR Chisq Df Pr(>Chisq)    
## consonant      493  5     <2e-16 ***
## morph          283  5     <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Which Cs are significantly different?

nas_sub.glht.C <- glht(nas_sub.glm, linfct = mcp(consonant = "Tukey"))
summary(nas_sub.glht.C) #I.e., all are different except p vs. t/s, t/s vs. k
## 
##   Simultaneous Tests for General Linear Hypotheses
## 
## Multiple Comparisons of Means: Tukey Contrasts
## 
## 
## Fit: glm(formula = cbind(subst_count, unsubst_count) ~ consonant + 
##     morph, family = binomial(), data = nas_sub)
## 
## Linear Hypotheses:
##              Estimate Std. Error z value Pr(>|z|)    
## t/s - p == 0   -0.786      0.402   -1.95    0.337    
## k - p == 0     -1.368      0.479   -2.86    0.041 *  
## b - p == 0     -3.259      0.434   -7.52   <0.001 ***
## d - p == 0     -5.766      0.568  -10.15   <0.001 ***
## g - p == 0     -9.113      1.117   -8.16   <0.001 ***
## k - t/s == 0   -0.583      0.370   -1.57    0.583    
## b - t/s == 0   -2.474      0.303   -8.18   <0.001 ***
## d - t/s == 0   -4.980      0.474  -10.50   <0.001 ***
## g - t/s == 0   -8.327      1.073   -7.76   <0.001 ***
## b - k == 0     -1.891      0.376   -5.03   <0.001 ***
## d - k == 0     -4.398      0.519   -8.48   <0.001 ***
## g - k == 0     -7.745      1.092   -7.09   <0.001 ***
## d - b == 0     -2.507      0.432   -5.80   <0.001 ***
## g - b == 0     -5.854      1.051   -5.57   <0.001 ***
## g - d == 0     -3.347      1.084   -3.09    0.021 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## (Adjusted p values reported -- single-step method)

Which prefixes are significantly different?

nas_sub.glht.morph <- glht(nas_sub.glm, linfct = mcp(morph = "Tukey"))
summary(nas_sub.glht.morph) #I.e., all are different except paN-RED vs. maN- other, maN- adv vs. paN-RED-, maN- adv vs. maN-RED-
## 
##   Simultaneous Tests for General Linear Hypotheses
## 
## Multiple Comparisons of Means: Tukey Contrasts
## 
## 
## Fit: glm(formula = cbind(subst_count, unsubst_count) ~ consonant + 
##     morph, family = binomial(), data = nas_sub)
## 
## Linear Hypotheses:
##                             Estimate Std. Error z value Pr(>|z|)    
## paN-RED- - maN- other == 0    -0.869      0.492   -1.77     0.48    
## maN- adv. - maN- other == 0   -2.146      0.517   -4.15   <0.001 ***
## maN-RED- - maN- other == 0    -2.648      0.444   -5.97   <0.001 ***
## paN- noun - maN- other == 0   -4.248      0.438   -9.69   <0.001 ***
## paN- res. - maN- other == 0   -5.910      0.546  -10.82   <0.001 ***
## maN- adv. - paN-RED- == 0     -1.277      0.514   -2.49     0.12    
## maN-RED- - paN-RED- == 0      -1.779      0.440   -4.05   <0.001 ***
## paN- noun - paN-RED- == 0     -3.380      0.431   -7.85   <0.001 ***
## paN- res. - paN-RED- == 0     -5.042      0.539   -9.35   <0.001 ***
## maN-RED- - maN- adv. == 0     -0.502      0.449   -1.12     0.87    
## paN- noun - maN- adv. == 0    -2.103      0.423   -4.97   <0.001 ***
## paN- res. - maN- adv. == 0    -3.765      0.531   -7.09   <0.001 ***
## paN- noun - maN-RED- == 0     -1.600      0.325   -4.92   <0.001 ***
## paN- res. - maN-RED- == 0     -3.263      0.458   -7.12   <0.001 ***
## paN- res. - paN- noun == 0    -1.662      0.393   -4.23   <0.001 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## (Adjusted p values reported -- single-step method)

Another way to do it is to use the weights argument for “case weights” (see https://stat.ethz.ch/pipermail/r-help/2007-March/126891.html), but it requires separate lines for subst. and unsubst:

#The following could be made from nas_sub, which would be more elegant than reading it in from a file:
tag <- read.table("TagalogRatesTable.txt", header=TRUE, sep="\t")
tag #varying words are treated as separate type
##         C     voice place     morph outcome freq
## 1       p voiceless   lab   paN_RED  0unsub    0
## 2       p voiceless   lab   paN_RED    vary    0
## 3       p voiceless   lab   paN_RED    1sub   39
## 4       t voiceless   cor   paN_RED  0unsub    0
## 5       t voiceless   cor   paN_RED    vary    0
## 6       t voiceless   cor   paN_RED    1sub   22
## 7       s voiceless   cor   paN_RED  0unsub    0
## 8       s voiceless   cor   paN_RED    vary    0
## 9       s voiceless   cor   paN_RED    1sub   41
## 10      k voiceless   vel   paN_RED  0unsub    1
## 11      k voiceless   vel   paN_RED    vary    0
## 12      k voiceless   vel   paN_RED    1sub   25
## 13      b    voiced   lab   paN_RED  0unsub    5
## 14      b    voiced   lab   paN_RED    vary    1
## 15      b    voiced   lab   paN_RED    1sub   29
## 16      d    voiced   cor   paN_RED  0unsub    7
## 17      d    voiced   cor   paN_RED    vary    0
## 18      d    voiced   cor   paN_RED    1sub    3
## 19      g    voiced   vel   paN_RED  0unsub   17
## 20      g    voiced   vel   paN_RED    vary    0
## 21      g    voiced   vel   paN_RED    1sub    1
## 22  glott      <NA>  <NA>   paN_RED  0unsub    7
## 23  glott      <NA>  <NA>   paN_RED    vary    0
## 24  glott      <NA>  <NA>   paN_RED    1sub   17
## 25      p voiceless   lab   maN_RED  0unsub    0
## 26      p voiceless   lab   maN_RED    vary    0
## 27      p voiceless   lab   maN_RED    1sub   18
## 28      t voiceless   cor   maN_RED  0unsub    0
## 29      t voiceless   cor   maN_RED    vary    0
## 30      t voiceless   cor   maN_RED    1sub   19
## 31      s voiceless   cor   maN_RED  0unsub    1
## 32      s voiceless   cor   maN_RED    vary    0
## 33      s voiceless   cor   maN_RED    1sub   25
## 34      k voiceless   vel   maN_RED  0unsub    2
## 35      k voiceless   vel   maN_RED    vary    0
## 36      k voiceless   vel   maN_RED    1sub   20
## 37      b    voiced   lab   maN_RED  0unsub   20
## 38      b    voiced   lab   maN_RED    vary    3
## 39      b    voiced   lab   maN_RED    1sub   15
## 40      d    voiced   cor   maN_RED  0unsub   12
## 41      d    voiced   cor   maN_RED    vary    0
## 42      d    voiced   cor   maN_RED    1sub    1
## 43      g    voiced   vel   maN_RED  0unsub   12
## 44      g    voiced   vel   maN_RED    vary    0
## 45      g    voiced   vel   maN_RED    1sub    0
## 46  glott      <NA>  <NA>   maN_RED  0unsub    6
## 47  glott      <NA>  <NA>   maN_RED    vary    0
## 48  glott      <NA>  <NA>   maN_RED    1sub    7
## 49      p voiceless   lab   maN_adv  0unsub    0
## 50      p voiceless   lab   maN_adv    vary    0
## 51      p voiceless   lab   maN_adv    1sub   11
## 52      t voiceless   cor   maN_adv  0unsub    0
## 53      t voiceless   cor   maN_adv    vary    0
## 54      t voiceless   cor   maN_adv    1sub   12
## 55      s voiceless   cor   maN_adv  0unsub    0
## 56      s voiceless   cor   maN_adv    vary    0
## 57      s voiceless   cor   maN_adv    1sub   39
## 58      k voiceless   vel   maN_adv  0unsub    0
## 59      k voiceless   vel   maN_adv    vary    0
## 60      k voiceless   vel   maN_adv    1sub   11
## 61      b    voiced   lab   maN_adv  0unsub    6
## 62      b    voiced   lab   maN_adv    vary    0
## 63      b    voiced   lab   maN_adv    1sub    6
## 64      d    voiced   cor   maN_adv  0unsub   12
## 65      d    voiced   cor   maN_adv    vary    0
## 66      d    voiced   cor   maN_adv    1sub    0
## 67      g    voiced   vel   maN_adv  0unsub    9
## 68      g    voiced   vel   maN_adv    vary    0
## 69      g    voiced   vel   maN_adv    1sub    0
## 70  glott      <NA>  <NA>   maN_adv  0unsub   21
## 71  glott      <NA>  <NA>   maN_adv    vary    0
## 72  glott      <NA>  <NA>   maN_adv    1sub    4
## 73      p voiceless   lab maN_other  0unsub    0
## 74      p voiceless   lab maN_other    vary    0
## 75      p voiceless   lab maN_other    1sub   65
## 76      t voiceless   cor maN_other  0unsub    0
## 77      t voiceless   cor maN_other    vary    0
## 78      t voiceless   cor maN_other    1sub   39
## 79      s voiceless   cor maN_other  0unsub    0
## 80      s voiceless   cor maN_other    vary    0
## 81      s voiceless   cor maN_other    1sub   32
## 82      k voiceless   vel maN_other  0unsub    0
## 83      k voiceless   vel maN_other    vary    1
## 84      k voiceless   vel maN_other    1sub   74
## 85      b    voiced   lab maN_other  0unsub    6
## 86      b    voiced   lab maN_other    vary    0
## 87      b    voiced   lab maN_other    1sub   66
## 88      d    voiced   cor maN_other  0unsub    3
## 89      d    voiced   cor maN_other    vary    0
## 90      d    voiced   cor maN_other    1sub    7
## 91      g    voiced   vel maN_other  0unsub   13
## 92      g    voiced   vel maN_other    vary    0
## 93      g    voiced   vel maN_other    1sub    0
## 94  glott      <NA>  <NA> maN_other  0unsub    4
## 95  glott      <NA>  <NA> maN_other    vary    0
## 96  glott      <NA>  <NA> maN_other    1sub   34
## 97      p voiceless   lab   paN_nom  0unsub    3
## 98      p voiceless   lab   paN_nom    vary    7
## 99      p voiceless   lab   paN_nom    1sub   27
## 100     t voiceless   cor   paN_nom  0unsub    8
## 101     t voiceless   cor   paN_nom    vary   18
## 102     t voiceless   cor   paN_nom    1sub   25
## 103     s voiceless   cor   paN_nom  0unsub    6
## 104     s voiceless   cor   paN_nom    vary   13
## 105     s voiceless   cor   paN_nom    1sub   20
## 106     k voiceless   vel   paN_nom  0unsub    7
## 107     k voiceless   vel   paN_nom    vary    5
## 108     k voiceless   vel   paN_nom    1sub    7
## 109     b    voiced   lab   paN_nom  0unsub   26
## 110     b    voiced   lab   paN_nom    vary    8
## 111     b    voiced   lab   paN_nom    1sub   11
## 112     d    voiced   cor   paN_nom  0unsub   17
## 113     d    voiced   cor   paN_nom    vary    0
## 114     d    voiced   cor   paN_nom    1sub    1
## 115     g    voiced   vel   paN_nom  0unsub   27
## 116     g    voiced   vel   paN_nom    vary    0
## 117     g    voiced   vel   paN_nom    1sub    0
## 118 glott      <NA>  <NA>   paN_nom  0unsub   22
## 119 glott      <NA>  <NA>   paN_nom    vary    0
## 120 glott      <NA>  <NA>   paN_nom    1sub    6
## 121     p voiceless   lab   paN_res  0unsub    3
## 122     p voiceless   lab   paN_res    vary    3
## 123     p voiceless   lab   paN_res    1sub    3
## 124     t voiceless   cor   paN_res  0unsub    5
## 125     t voiceless   cor   paN_res    vary    3
## 126     t voiceless   cor   paN_res    1sub    2
## 127     s voiceless   cor   paN_res  0unsub    5
## 128     s voiceless   cor   paN_res    vary    3
## 129     s voiceless   cor   paN_res    1sub    0
## 130     k voiceless   vel   paN_res  0unsub    5
## 131     k voiceless   vel   paN_res    vary    0
## 132     k voiceless   vel   paN_res    1sub    2
## 133     b    voiced   lab   paN_res  0unsub   17
## 134     b    voiced   lab   paN_res    vary    1
## 135     b    voiced   lab   paN_res    1sub    1
## 136     d    voiced   cor   paN_res  0unsub    7
## 137     d    voiced   cor   paN_res    vary    0
## 138     d    voiced   cor   paN_res    1sub    0
## 139     g    voiced   vel   paN_res  0unsub    4
## 140     g    voiced   vel   paN_res    vary    0
## 141     g    voiced   vel   paN_res    1sub    0
## 142 glott      <NA>  <NA>   paN_res  0unsub    5
## 143 glott      <NA>  <NA>   paN_res    vary    0
## 144 glott      <NA>  <NA>   paN_res    1sub    0
tag.glm <- glm(outcome ~ morph+voice+place, weights = freq, 
  data = subset(tag, tag$outcome != "var"), family = binomial)
summary(tag.glm)
## 
## Call:
## glm(formula = outcome ~ morph + voice + place, family = binomial, 
##     data = subset(tag, tag$outcome != "var"), weights = freq)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -5.876   0.000   0.000   0.889   5.290  
## 
## Coefficients:
##                Estimate Std. Error z value Pr(>|z|)    
## (Intercept)      -1.962      0.458   -4.29  1.8e-05 ***
## morphmaN_other    2.076      0.485    4.28  1.8e-05 ***
## morphmaN_RED     -0.312      0.452   -0.69   0.4908    
## morphpaN_nom     -1.361      0.430   -3.16   0.0016 ** 
## morphpaN_RED      1.245      0.492    2.53   0.0114 *  
## morphpaN_res     -3.617      0.555   -6.52  7.1e-11 ***
## voicevoiceless    5.358      0.371   14.46  < 2e-16 ***
## placelab          2.380      0.346    6.88  6.0e-12 ***
## placevel         -1.411      0.326   -4.33  1.5e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1189.12  on 74  degrees of freedom
## Residual deviance:  504.25  on 66  degrees of freedom
##   (18 observations deleted due to missingness)
## AIC: 522.3
## 
## Number of Fisher Scoring iterations: 5
#Which affixes are pairwise different?
summary(glht(tag.glm, linfct = mcp(morph = "Tukey"))) #all but maN-RED- vs. maN- adv, paN-RED- vs. maN- adv
## 
##   Simultaneous Tests for General Linear Hypotheses
## 
## Multiple Comparisons of Means: Tukey Contrasts
## 
## 
## Fit: glm(formula = outcome ~ morph + voice + place, family = binomial, 
##     data = subset(tag, tag$outcome != "var"), weights = freq)
## 
## Linear Hypotheses:
##                          Estimate Std. Error z value Pr(>|z|)    
## maN_other - maN_adv == 0    2.076      0.485    4.28   <0.001 ***
## maN_RED - maN_adv == 0     -0.312      0.452   -0.69   0.9824    
## paN_nom - maN_adv == 0     -1.361      0.430   -3.16   0.0186 *  
## paN_RED - maN_adv == 0      1.245      0.492    2.53   0.1114    
## paN_res - maN_adv == 0     -3.617      0.555   -6.52   <0.001 ***
## maN_RED - maN_other == 0   -2.388      0.404   -5.91   <0.001 ***
## paN_nom - maN_other == 0   -3.438      0.397   -8.66   <0.001 ***
## paN_RED - maN_other == 0   -0.831      0.432   -1.92   0.3781    
## paN_res - maN_other == 0   -5.693      0.546  -10.43   <0.001 ***
## paN_nom - maN_RED == 0     -1.050      0.335   -3.13   0.0205 *  
## paN_RED - maN_RED == 0      1.557      0.414    3.77   0.0021 ** 
## paN_res - maN_RED == 0     -3.305      0.489   -6.76   <0.001 ***
## paN_RED - paN_nom == 0      2.607      0.404    6.45   <0.001 ***
## paN_res - paN_nom == 0     -2.256      0.441   -5.11   <0.001 ***
## paN_res - paN_RED == 0     -4.862      0.549   -8.86   <0.001 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## (Adjusted p values reported -- single-step method)

Decision-tree model

One of the models to fit to the data is a decision-tree (multiplicative) model: give each prefix a probability of creating a configuration compatible with nasal substitution, and give each consonant a probability of undergoing nas-sub, if it’s in the right configuration. The probability of C+morph combination displaying nas-sub is then the product of those two probabilities (hence, ‘multiplicative’). The problem is that a model like this can only create a pinch at one end, resulting in a “claw” shape rather than a “wug” shape (Dustin Bowers’s term).

Fit the optimal probabilities:

set.seed(1234) #just in case we do anything with random numbers
myMultiplicative <- function(x) {   #define the error function that is to be minimized
    
    #log likelihood
    log_likelihood <- 0
    for (i in 1:6) { #loop over prefixes
      for(j in 1:6) { #loop over Cs
        #increment log likelihood by (token-weighted) number of substituted items times the log of their probability, plus (token-weighted) number of unsubstituted items items log of their probability
        log_likelihood <- log_likelihood + nas_sub[i+6*(j-1),4]*log(x[i]*x[6+j]) + nas_sub[i+6*(j-1),5]*log(1-(x[i]*x[6+j])) #This is rather fiddly. The idea is that x[1:6] are the probabilities for the prefix, and x[7:12] are the probabilities for the Cs groups. We're using the loops to step through the rows of the nas_sub data-frame
      }
    }
    return(-1*log_likelihood) #by default optim() minimizes, so we minimize the negative log likelihood (that is, get log likelihood as close to zero as possible)
}

#run the optimizer
myOptimization <- optim(par=c(0.5,0.5,0.5,0.5,0.5,0.5, 0.5,0.5,0.5,0.5,0.5,0.5), fn=myMultiplicative, lower=0.000001, upper=1-0.000001, method="L-BFGS-B") #we have to keep the parameters away from actual 0 and 1 or else undefined log()s will result and an error is thrown

#view the parameters
myOptimization$par
##  [1] 1.00000 1.00000 0.94680 1.00000 0.65385 0.26828 1.00000 1.00000
##  [9] 0.98453 0.74334 0.21067 0.01372
#get the log likelihood
-1*myOptimization$value
## [1] -292.3

Add predicted probabilities as a new column to data-frame:

for (i in 1:6) { #loop over prefixes
  for(j in 1:6) { #loop over Cs
    nas_sub$multiplicative_prediction[i+6*(j-1)] <- myOptimization$par[i] * myOptimization$par[6+j]
  }
}

Plot it:

par(mar=c(5,5,2,6))
suppressWarnings(with(nas_sub, {
  interaction.plot(x.factor=consonant, 
  trace.factor=morph_unicode, 
  response=multiplicative_prediction,
  xlab="stem-initial consonant",ylab="nas. sub. rate predicted by multiplicative model"
  , trace.label="prefix construction", fixed=TRUE, type="b", pch=c(0,1,2,20,18,15))
}))

plot of chunk plotDecisionTreeModelTagalog

#Since this will appear in the paper, also do a PNG file:
png(file="Tagalog_decisionTree_model.png",width=myResMultiplier*460,height=myResMultiplier*300,res=myResMultiplier*72,family=myFontFamily)
par(mar=c(5,4,2,0)+0.1)
with(nas_sub, {
  interaction.plot(x.factor=consonant, 
  trace.factor=morph_unicode, 
  response=multiplicative_prediction,
  xlab="stem-initial consonant",ylab="nas. sub. rate predicted by multiplicative model"
  , trace.label="prefix construction", fixed=TRUE, type="b", pch=c(0,1,2,20,18,15))
})
dev.off()
## pdf 
##   2

Making OTSoft input file

To aid reproducibility and record-keeping, we generate an OTSoft input file automatically from this script.

Print first two rows (constraint names):

write(c("\t\t\t*NC\t*[m/n/ng\t*[n/ng\t*[ng\tNasSub\tFaith-maN- other\tFaith-paN-RED-\tFaith-maN-RED-\tFaith-maN- adv\tFaith-paN- noun\tFaith-paN- res","\t\t\t*NC\t*[m/n/ng\t*[n/ng\t*[ng\tNasSub\tFaith-maN- other\tFaith-paN-RED-\tFaith-maN-RED-\tFaith-maN- adv\tFaith-paN- noun\tFaith-paN- res"), file="Tagalog_for_OTSoft.txt",append=FALSE) #overwrite anything there already

Loop through data frame, turning each row into a two-row tableau:

for(i in 1:length(nas_sub$consonant)) {
  #make the input
  myInput <- paste(nas_sub$consonant[i], nas_sub$morph[i])
  
  #make the violation vectors for the two candidates
  mySubst_violations <- rep(0,11) #initialize to all zeros
  myUnsubst_violations <- rep(0,11) #initialize to all zeros
  if(nas_sub$consonant[i] %in% c("p","t/s","k")) {
    myUnsubst_violations[1] <- 1 #*NC
  }
  if(nas_sub$consonant[i] %in% c("p","b")) {
    mySubst_violations[2] <- 1 #*[m/n/ng
  } else if(nas_sub$consonant[i] %in% c("t/s", "d")) {
    mySubst_violations[2:3] <- 1 #*[m/n/ng, *[m/ng
  } else if(nas_sub$consonant[i] %in% c("k", "g")) {
    mySubst_violations[2:4] <- 1 #*[m/n/ng, *[m/ng, *[ng
  }
  myUnsubst_violations[5] <- 1 #NasSub
  if(nas_sub$morph[i] == "maN- other") {
    mySubst_violations[6] <- 1
  } else if(nas_sub$morph[i] == "paN-RED-") {
    mySubst_violations[7] <- 1
  } else if(nas_sub$morph[i] == "maN-RED-") {
    mySubst_violations[8] <- 1
  } else if(nas_sub$morph[i] == "maN- adv.") {
    mySubst_violations[9] <- 1
  }else if(nas_sub$morph[i] == "paN- noun") {
    mySubst_violations[10] <- 1
  }else if(nas_sub$morph[i] == "paN- res.") {
    mySubst_violations[11] <- 1
  } 
  mySubst_violations <- paste(as.character(mySubst_violations),collapse="\t")
  myUnsubst_violations <- paste(as.character(myUnsubst_violations),collapse="\t")
  
  #print the two rows
  write(c(paste(myInput,"substituted",nas_sub$subst_count[i],mySubst_violations,sep="\t"),paste("","unsubstituted",nas_sub$unsubst_count[i],myUnsubst_violations,sep="\t")),file="Tagalog_for_OTSoft.txt",append=TRUE)
}

#While the above produces an output file that *looks* fine, and that the MaxEnt Grammar Tool has no problem with, OTSoft ignores its last line for some reason. This seems to fix the problem:
write(c("\t\t\t\t\t\t\t\t\t"),file="Tagalog_for_OTSoft.txt",append=TRUE)
##BUT! The line with the tabs has to be removed in order to run Bruce's multifold GLA program, or else it acts as an additional candidate (with no constraint violations, so always wins)
#Hm, sometimes it also has to be removed for MaxEnt grammar tool
#Try this instead:
#write(c("   "),file="Tagalog_for_OTSoft.txt",append=TRUE)

Constraint models

Outside of this script, the file made by the above code, Tagalog_for_OTSoft.txt, is then used as input for a few constraint models.

MaxEnt

The MaxEnt Grammar Tool (http://www.linguistics.ucla.edu/people/hayes/MaxentGrammarTool/), is used, with default settings. In particular, we use the default values of mu and sigma for every constrain(mu=0, sigma=10000). The large sigma means that, in effect, there no regularization/smoothing/prior–just as close a fit as possible. The output file is named Tagalog_for_OTSoft_MaxEnt_output.txt.

Read in the MaxEnt output file:

conn <- file("Tagalog_for_OTSoft_MaxEnt_output.txt",open="r")
maxEntLines <- readLines(conn)
close(conn)

Find and parse the lines at end that give probabilities for candidates, adding results to main data frame:

#Define function to find the header row for this part
getHeaderRowIndex <- function() {
  for(i in 1:length(maxEntLines)) {
    if(maxEntLines[i]=="Input:\tCandidate:\tObserved:\tPredicted:") {
      return(i)
    }
  }
}

#Use the function to see where to start for loop
headerRowIndex <- getHeaderRowIndex()

for(i in headerRowIndex+1:length(maxEntLines)){
  #split into columns
  lineParts <- strsplit(maxEntLines[i],split="\t")
  
  #Don't bother proceeding futher if it's un "unsubstituted" candidate line
  if(length(lineParts[[1]]) == 4 & lineParts[[1]][2]=="substituted") {
    
    #extract consonant, morphology, and probability of substituted candidate
    myC <- strsplit(lineParts[[1]][1],split=" ",fixed=TRUE)[[1]][1]
    myMorph <- str_sub(lineParts[[1]][1],str_length(myC)+2)
    mySubst_probability <- as.numeric(lineParts[[1]][4])
    
    #add to main data frame
    nas_sub$maxentPredictions[nas_sub$consonant==myC & nas_sub$morph==myMorph] <- mySubst_probability
  }
}

Plot it:

par(mar=c(5,5,2,6))
suppressWarnings(with(nas_sub, {
  interaction.plot(x.factor=consonant, 
  trace.factor=morph_unicode, 
  response=maxentPredictions,
  xlab="stem-initial consonant",ylab="nas. sub. rate predicted by MaxEnt model"
  , trace.label="prefix construction", fixed=TRUE, type="b", pch=c(0,1,2,20,18,15))
}))

plot of chunk plotMaxEntModelTagalog

#Since this will appear in the paper, also do a PNG file:
png(file="Tagalog_MaxEnt_model.png",width=myResMultiplier*460,height=myResMultiplier*300,res=myResMultiplier*72,family=myFontFamily)
par(mar=c(5,4,2,0)+0.1)
with(nas_sub, {
  interaction.plot(x.factor=consonant, 
  trace.factor=morph_unicode, 
  response=maxentPredictions,
  xlab="stem-initial consonant",ylab="nas. sub. rate predicted by MaxEnt model"
  , trace.label="prefix construction", fixed=TRUE, type="b", pch=c(0,1,2,20,18,15))
})
dev.off()
## pdf 
##   2

Get a table of constraint names and weights:

#Initialize data frame
maxEntGrammar <- data.frame(constraintName=character(0), constraintWeight = numeric(0))

#Define function to find the header row for this part
getConstraintHeaderRowIndex <- function() {
  for(i in 1:length(maxEntLines)) {
    if(maxEntLines[i]=="|weights| after optimization:") {
      return(i)
    }
  }
}

#Use the function to see where to start for loop
constraintHeaderRowIndex <- getConstraintHeaderRowIndex()

#Extract names and weights and add to data frame
for(i in (constraintHeaderRowIndex+1):(headerRowIndex-1)){
  lineParts <- strsplit(maxEntLines[i],split="\t")
  myConstraintName <- strsplit(lineParts[[1]][1],split="\\(")[[1]][1] #split at opening ( of mu/sigma note
  myConstraintName <- substr(myConstraintName,start=1,stop=nchar(myConstraintName)-1) #trim final space off of constraint name
  myConstraintWeight <- lineParts[[1]][2]

  maxEntGrammar <- rbind(maxEntGrammar,data.frame(constraintName=myConstraintName, constraintWeight=myConstraintWeight))
  
}

#Print to console, for pasting into Word document
maxEntGrammar
##      constraintName   constraintWeight
## 1               *NC  4.850637335679573
## 2          *[m/n/ng                0.0
## 3            *[n/ng 2.1263895994464734
## 4              *[ng 1.1565693854918335
## 5            NasSub  2.310635996134479
## 6  Faith-maN- other                0.0
## 7    Faith-paN-RED- 0.8154354862375875
## 8    Faith-maN-RED-   2.28788653243942
## 9    Faith-maN- adv 1.9200481316624465
## 10  Faith-paN- noun  4.063326210375418
## 11   Faith-paN- res  6.008815758248136

Explicatory sigmoid plots

When the article discusses the MaxEnt models, it includes some plots to help explain the sigmoid shape of the MaxEnt grammar’s predictions.

maN- other vs. paN- reservational, for each consonant

#get UNIF weights
maN_other_weight <- as.numeric(as.character(maxEntGrammar$constraintWeight[maxEntGrammar$constraintName=="Faith-maN- other"]))
paN_res_weight <- as.numeric(as.character(maxEntGrammar$constraintWeight[maxEntGrammar$constraintName=="Faith-paN- res"]))

#plot two curves and a legend
curve(1/(1+exp(x+maN_other_weight)), -10, 10, lty=1, xlab="markedness difference for C", ylab="prob. of substitution",ylim=c(0,1), cex.axis=0.9)
curve(1/(1+exp(x+paN_res_weight)), add=TRUE, lty=3)
suppressWarnings(legend(2, 0.9, c("ma\U014B- other","pa\U014B- reservational"), cex=0.8, lty=c(1,3)))

#get markedness differences for each C
markedness_differences <- data.frame(consonant=character(0), mark_diff = numeric(0))

for(myC in levels(nas_sub$consonant)) {
  #start with unsubst's penalty for NasSub
  myMarkednessDifference <- 0 - as.numeric(as.character(maxEntGrammar$constraintWeight[maxEntGrammar$constraintName=="NasSub"]))
  
  #subtract unsubst's penalty for *NC, if applicable
  if(myC %in% c("p","t/s","k")) {
    myMarkednessDifference <- myMarkednessDifference - as.numeric(as.character(maxEntGrammar$constraintWeight[maxEntGrammar$constraintName=="*NC"]))
  }
  
  #add subst's penalty for applicable *[nasal constraints
  myMarkednessDifference <- myMarkednessDifference + as.numeric(as.character(maxEntGrammar$constraintWeight[maxEntGrammar$constraintName=="*[m/n/ng"])) #all of them violate this one
  if(myC %in% c("t/s","k","d","g")) {
    myMarkednessDifference <- myMarkednessDifference + as.numeric(as.character(maxEntGrammar$constraintWeight[maxEntGrammar$constraintName=="*[n/ng"])) #just the coronals and velars
  }
  if(myC %in% c("k","g")) {
    myMarkednessDifference <- myMarkednessDifference + as.numeric(as.character(maxEntGrammar$constraintWeight[maxEntGrammar$constraintName=="*[ng"])) #just the velars
  }
  
  #Now draw a line there
  abline(v=myMarkednessDifference,col=3,lty=2)
  #And add a label
  mtext(myC,side=3, at=myMarkednessDifference, line=0.5)
  
  #Record the result in the data frame:
  markedness_differences <- rbind(markedness_differences,data.frame(consonant=myC, mark_diff=myMarkednessDifference))
}

plot of chunk plotSigmoids_maN_vs_paN

#Now the same thing, but written to a file for pasting into paper, and without saving the markedness differences
png(file="Tagalog_maN_vs_paN.png",width=myResMultiplier*460,height=myResMultiplier*225,res=myResMultiplier*72,family=myFontFamily)
par(mar=c(5,4,2,0)+0.1)
curve(1/(1+exp(x+maN_other_weight)), -10, 10, lty=1, xlab="markedness difference for C", ylab="prob. of substitution",ylim=c(0,1), cex.axis=0.9)
curve(1/(1+exp(x+paN_res_weight)), add=TRUE, lty=3)
legend(2, 0.9, c("ma\U014B- other","pa\U014B- reservational"), cex=0.8, lty=c(1,3))

for(myC in levels(nas_sub$consonant)) {
  myMarkednessDifference <- 0 - as.numeric(as.character(maxEntGrammar$constraintWeight[maxEntGrammar$constraintName=="NasSub"]))
  if(myC %in% c("p","t/s","k")) {
    myMarkednessDifference <- myMarkednessDifference - as.numeric(as.character(maxEntGrammar$constraintWeight[maxEntGrammar$constraintName=="*NC"]))
  }
  myMarkednessDifference <- myMarkednessDifference + as.numeric(as.character(maxEntGrammar$constraintWeight[maxEntGrammar$constraintName=="*[m/n/ng"])) 
  if(myC %in% c("t/s","k","d","g")) {
    myMarkednessDifference <- myMarkednessDifference + as.numeric(as.character(maxEntGrammar$constraintWeight[maxEntGrammar$constraintName=="*[n/ng"]))
  }
  if(myC %in% c("k","g")) {
    myMarkednessDifference <- myMarkednessDifference + as.numeric(as.character(maxEntGrammar$constraintWeight[maxEntGrammar$constraintName=="*[ng"])) #just the velars
  }
  abline(v=myMarkednessDifference,col=3,lty=2)
  mtext(myC,side=3, at=myMarkednessDifference, line=0.5)
}
dev.off()
## pdf 
##   2

Now the converse: p vs. k for different UNIFORMITY weights

#a little extra head room for labels
par(mar=c(5,4,5,2))

#plot two curves and a legend
curve(1/(1+exp(markedness_differences$mark_diff[markedness_differences$consonant=="p"] + x)), 0, 15, lty=1, xlab="weight(Uniformity) for each prefix", ylab="prob. of substitution",ylim=c(0,1), cex.axis=0.9)
curve(1/(1+exp(markedness_differences$mark_diff[markedness_differences$consonant=="k"] + x)), add=TRUE, lty=3)
legend(10, 1.0, c("p","k"), cex=0.8, lty=c(1,3))

for(myMorph in levels(nas_sub$morph)) { 
  #get the unicode version of the morpheme name
  myMorphUnicode <- nas_sub$morph_unicode[nas_sub$morph==myMorph][1]
  #form the corresponding constraint name
  myConstraintName <- paste("Faith-",myMorph,sep="")
  #remove final period if necessary
  if(substr(myConstraintName,start=nchar(myConstraintName),stop=nchar(myConstraintName))==".") {
    myConstraintName <- substr(myConstraintName,start=0,stop=nchar(myConstraintName)-1)
  }
  
  #get UNIF weight
  myUnifWeight <- as.numeric(as.character(maxEntGrammar$constraintWeight[maxEntGrammar$constraintName==myConstraintName]))
  #Now draw a line there
  abline(v=myUnifWeight,col=3,lty=2)
  #And add a label
  par(las=3) #vertical text
  suppressWarnings(mtext(myMorphUnicode, side=3, at=myUnifWeight, line=0.5))
  par(las=0) #return to default
}

plot of chunk plotSigmoids_p_vs_k

#Now the same thing, but written to a file for pasting into paper
png(file="Tagalog_p_vs_k.png",width=myResMultiplier*460,height=myResMultiplier*270, res=myResMultiplier*72, family=myFontFamily)
par(mar=c(5,4,5,0)+0.1)
curve(1/(1+exp(markedness_differences$mark_diff[markedness_differences$consonant=="p"] + x)), 0, 12, lty=1, xlab="weight(Uniformity) for each prefix", ylab="prob. of substitution",ylim=c(0,1), cex.axis=0.9)
curve(1/(1+exp(markedness_differences$mark_diff[markedness_differences$consonant=="k"] + x)), add=TRUE, lty=3)
legend(10, 1.0, c("p","k"), cex=0.8, lty=c(1,3))

for(myMorph in levels(nas_sub$morph)) { 
  myMorphUnicode <- nas_sub$morph_unicode[nas_sub$morph==myMorph][1]
  myConstraintName <- paste("Faith-",myMorph,sep="")
  if(substr(myConstraintName,start=nchar(myConstraintName),stop=nchar(myConstraintName))==".") {
    myConstraintName <- substr(myConstraintName,start=0,stop=nchar(myConstraintName)-1)
  }
  myUnifWeight <- as.numeric(as.character(maxEntGrammar$constraintWeight[maxEntGrammar$constraintName==myConstraintName]))
  abline(v=myUnifWeight,col=3,lty=2)
  par(las=3)
  suppressWarnings(mtext(myMorphUnicode,side=3, at=myUnifWeight, line=0.5))
  par(las=0)
}
dev.off()
## pdf 
##   2

Same thing except b vs. g, to show a different end of the curve–not used in the paper, so plot is not as polished:

#a little extra head room for labels
par(mar=c(5,4,5,2))

#plot two curves and a legend
curve(1/(1+exp(markedness_differences$mark_diff[markedness_differences$consonant=="b"] + x)), 0, 15, lty=1, xlab="markedness difference for C", ylab="prob. of substitution",ylim=c(0,1), cex.axis=0.9)
curve(1/(1+exp(markedness_differences$mark_diff[markedness_differences$consonant=="g"] + x)), add=TRUE, lty=3)
legend(12, 1.0, c("b","g"), cex=0.8, lty=c(1,3))

for(myMorph in levels(nas_sub$morph)) { 
  #form the corresponding constraint name
  myConstraintName <- paste("Faith-",myMorph,sep="")
  #remove final period if necessary
  if(substr(myConstraintName,start=nchar(myConstraintName),stop=nchar(myConstraintName))==".") {
    myConstraintName <- substr(myConstraintName,start=0,stop=nchar(myConstraintName)-1)
  }
  
  #get UNIF weight
  myUnifWeight <- as.numeric(as.character(maxEntGrammar$constraintWeight[maxEntGrammar$constraintName==myConstraintName]))
  #Now draw a line there
  abline(v=myUnifWeight,col=3,lty=2)
  #And add a label
  par(las=3) #vertical text
  mtext(myMorph,side=3, at=myUnifWeight)
  par(las=0) #return to default
}

plot of chunk plotSigmoids_b_vs_g

#Now the same thing, but written to a file for pasting into paper
png(file="Tagalog_b_vs_g.png",width=460,height=225)
par(mar=c(5,4,5,1))
curve(1/(1+exp(markedness_differences$mark_diff[markedness_differences$consonant=="b"] + x)), 0, 15, lty=1, xlab="markedness difference for C", ylab="prob. of substitution",ylim=c(0,1), cex.axis=0.9)
curve(1/(1+exp(markedness_differences$mark_diff[markedness_differences$consonant=="g"] + x)), add=TRUE, lty=3)
legend(12, 1.0, c("b","g"), cex=0.8, lty=c(1,3))

for(myMorph in levels(nas_sub$morph)) { 
  myConstraintName <- paste("Faith-",myMorph,sep="")
  if(substr(myConstraintName,start=nchar(myConstraintName),stop=nchar(myConstraintName))==".") {
    myConstraintName <- substr(myConstraintName,start=0,stop=nchar(myConstraintName)-1)
  }
  myUnifWeight <- as.numeric(as.character(maxEntGrammar$constraintWeight[maxEntGrammar$constraintName==myConstraintName]))
  abline(v=myUnifWeight,col=3,lty=2)
  par(las=3)
  mtext(myMorph,side=3, at=myUnifWeight)
  par(las=0)
}
dev.off()
## pdf 
##   2

Noisy Harmonic Grammar

We use OTSoft (http://www.linguistics.ucla.edu/people/hayes/otsoft/) to fit a Noisy Harmonic Grammar model using the Gradual Learning Algorithm. Default options were used everywhere:

  • number of times to run learning: 10,000,000
  • apply noise by tableau cell not constraint? NO
  • apply noise after multiplication of weights by violation? NO
  • Allow constraint weights to go negative? NO
  • initial plasticity: 0.01 (used to be 2, but we reduced it to get smaller weights)
  • final plasticity: 0.001
  • number of times to test grammar: 1,000,000
  • default initial rankings (all same)

The only non-default option is that in the main screen, Options > Sort candiates in tableaux by harmony is turned off, to make dealing with the OTSoft output, below, easier.

Read in the Noisy HG output file:

conn <- file("Tagalog_for_OTSoft_NHGTabbedOutput.txt",open="r")
NHGLines <- readLines(conn)
close(conn)

Find and parse the lines at end that give probabilities for candidates–we need to find the third line that starts with " 1 " :

#Define function to find the header row for this part
howManyTimes1Seen <- 0 #initialize counter
getNHGStartingRowIndex <- function() {
  for(i in 1:length(NHGLines)) {
    if(str_sub(NHGLines[i],1,4)==" 1 \t") {
      howManyTimes1Seen <- howManyTimes1Seen + 1
      if(howManyTimes1Seen==3) {
        return(i) #once we hit the third one, return the index
      }
    }
  }
}

#Use the function to see where to start for loop
startingRowIndex <- getNHGStartingRowIndex()

for(i in startingRowIndex:length(NHGLines)){
  #split into columns
  lineParts <- strsplit(NHGLines[i],split="\t")
    
  #extract consonant, morphology, and probability of substituted candidate
  myC <- strsplit(lineParts[[1]][2],split=" ",fixed=TRUE)[[1]][1]
  myMorph <- str_sub(lineParts[[1]][2],str_length(myC)+2)
  
  #different lines have candidates in different orders
  if(lineParts[[1]][4]=="unsubstituted") {
    mySubst_probability <- 1 - as.numeric(lineParts[[1]][7])/1000000
  }
  else if(lineParts[[1]][4]=="substituted") {
    mySubst_probability <- as.numeric(lineParts[[1]][7])/1000000
  }
    
  #add to main data frame
    nas_sub$NHG_Predictions[nas_sub$consonant==myC & nas_sub$morph==myMorph] <- mySubst_probability

}

Plot it:

par(mar=c(5,5,2,6))
suppressWarnings(with(nas_sub, {
  interaction.plot(x.factor=consonant, 
  trace.factor=morph_unicode, 
  response=NHG_Predictions,
  xlab="stem-initial consonant",ylab="nas. sub. rate predicted by NHG model"
  , trace.label="prefix construction", fixed=TRUE, type="b", pch=c(0,1,2,20,18,15))
}))

plot of chunk plotNHGModelTagalog

#Since this will appear in the paper, also do a PNG file:
png(file="Tagalog_NHG_model.png",width=myResMultiplier*460,height=myResMultiplier*300,res=myResMultiplier*72,family=myFontFamily)
par(mar=c(5,4,2,0)+0.1)
with(nas_sub, {
  interaction.plot(x.factor=consonant, 
  trace.factor=morph_unicode, 
  response=NHG_Predictions,
  xlab="stem-initial consonant",ylab="nas. sub. rate predicted by NHG model"
  , trace.label="prefix construction", fixed=TRUE, type="b", pch=c(0,1,2,20,18,15))
})
dev.off()
## pdf 
##   2

Get a table of constraint names and weights:

#Initialize data frame
NHGGrammar <- data.frame(constraintName=character(0), constraintWeight = numeric(0))

#Define function to find the header row for this part
getNHGConstraintEndingRowIndex <- function() {
  for(i in 1:length(NHGLines)) {
    if(str_sub(NHGLines[i],1,4)==" 1 \t") { #find first instance of row that starts with "1" (rather than constraint rame)
      return(i)
    }
  }
}

#Use the function to see where to start for loop
constraintEndingRowIndex <- getNHGConstraintEndingRowIndex()

#Extract names and weights and add to data frame
for(i in 1:(constraintEndingRowIndex-1)){
  lineParts <- strsplit(NHGLines[i],split="\t")
  myConstraintName <- lineParts[[1]][1]
  myConstraintWeight <- as.numeric(lineParts[[1]][2])

  NHGGrammar <- rbind(NHGGrammar,data.frame(constraintName=myConstraintName, constraintWeight=myConstraintWeight)) 
}

#Print to console, for pasting into Word document
NHGGrammar
##      constraintName constraintWeight
## 1               *NC          10.6079
## 2          *[m/n/ng           0.4193
## 3            *[n/ng           4.3617
## 4              *[ng           2.1659
## 5            NasSub           6.1707
## 6  Faith-maN- other           0.3689
## 7    Faith-paN-RED-           2.1161
## 8    Faith-maN-RED-           5.0054
## 9    Faith-maN- adv           4.3075
## 10  Faith-paN- noun           8.9390
## 11   Faith-paN- res          13.6123

As with MaxEnt, the text of the paper includes an explanation of why NHG creates sigmoids. This plot accompanies that discussion. It is a plot of substitution probability as a function of UNIFORMITY weight, for two different consonants, p and k.

NC_weight <- as.numeric(as.character(NHGGrammar$constraintWeight[NHGGrammar$constraintName=="*NC"]))
NasSub_weight <- as.numeric(as.character(NHGGrammar$constraintWeight[NHGGrammar$constraintName=="NasSub"]))
mnng_weight <- as.numeric(as.character(NHGGrammar$constraintWeight[NHGGrammar$constraintName=="*[m/n/ng"]))
nng_weight <- as.numeric(as.character(NHGGrammar$constraintWeight[NHGGrammar$constraintName=="*[n/ng"]))
ng_weight <- as.numeric(as.character(NHGGrammar$constraintWeight[NHGGrammar$constraintName=="*[ng"]))

par(mar=c(5,4,5,2)) #extra room at top for morphology labels
#plot two curves
curve(pnorm(NC_weight+NasSub_weight-mnng_weight-x, sd = sqrt(4*4)), 0, 30, lty=1, xlab="UNIF weight", ylab="prob. of substitution",ylim=c(0,1), cex.axis=0.9)
curve(pnorm(NC_weight+NasSub_weight-mnng_weight-nng_weight-ng_weight-x, sd = sqrt(6*4)), add=TRUE, lty=3)
legend("topright", c("p","k"), cex=0.8, lty=c(1,3))

#add the lines for the UNIF weights
for(myMorph in levels(nas_sub$morph)) { 
  #get the unicode version of the morpheme name
  myMorphUnicode <- nas_sub$morph_unicode[nas_sub$morph == myMorph][1]
  #form the corresponding constraint name
  myConstraintName <- paste("Faith-",myMorph,sep="")
  #remove final period if necessary
  if(substr(myConstraintName,start=nchar(myConstraintName),stop=nchar(myConstraintName))==".") {
    myConstraintName <- substr(myConstraintName,start=0,stop=nchar(myConstraintName)-1)
  }
  
  #get UNIF weight
  myUnifWeight <- as.numeric(as.character(NHGGrammar$constraintWeight[NHGGrammar$constraintName==myConstraintName]))
  #Now draw a line there
  abline(v=myUnifWeight,col=3,lty=2)
  #And add a label
  par(las=3) #vertical text
  suppressWarnings(mtext(myMorphUnicode,side=3, at=myUnifWeight, line=0.5))
  par(las=0) #return to default
}

plot of chunk plotNHGSigmoids

#Now the same thing, but written to a file for pasting into paper
png(file="Tagalog_p_vs_k_NHG.png",width=myResMultiplier*460,height=myResMultiplier*270,res=myResMultiplier*72,family=myFontFamily)
par(mar=c(5,4,5,0)+0.1)
curve(pnorm(NC_weight+NasSub_weight-mnng_weight-x, sd = sqrt(4*4)), 0, 25, lty=1, xlab="UNIF weight", ylab="prob. of substitution",ylim=c(0,1), cex.axis=0.9)
curve(pnorm(NC_weight+NasSub_weight-mnng_weight-nng_weight-ng_weight-x, sd = sqrt(6*4)), add=TRUE, lty=3)
legend(20, 1.0, c("p","k"), cex=0.8, lty=c(1,3))

for(myMorph in levels(nas_sub$morph)) { 
  myMorphUnicode <- nas_sub$morph_unicode[nas_sub$morph == myMorph][1]
  myConstraintName <- paste("Faith-",myMorph,sep="")
  if(substr(myConstraintName,start=nchar(myConstraintName),stop=nchar(myConstraintName))==".") {
    myConstraintName <- substr(myConstraintName,start=0,stop=nchar(myConstraintName)-1)
  }
  
  myUnifWeight <- as.numeric(as.character(NHGGrammar$constraintWeight[NHGGrammar$constraintName==myConstraintName]))
  abline(v=myUnifWeight,col=3,lty=2)
  par(las=3)
  suppressWarnings(mtext(myMorphUnicode,side=3, at=myUnifWeight, line=0.5))
  par(las=0) 
}
dev.off()
## pdf 
##   2

Stochastic OT and Stratal OT with multiple runs

The GLA is not deterministic, and results can vary substantially from one run to the next. We used a (non-distributed) version of OTSoft that can run the GLA 10, 100, or 1000 times on the same input file, collating the results in a single file. We can then choose the best-fit model from this set.

We did this three times, twice in an attempt to find the best StOT model, and once to find the best stratal Anttilan model–in each case, the learner was run 1000 times (though, as we’ll see below, only the first 999 runs are examined):

  • NoMagri:
  • number of times to go through data: 1000000
  • starting plasticity: 2
  • ending plasticity: 0.001
  • number of times to test grammar: 100000
  • use Magri update rule: no

  • WithMagri:
  • number of times to go through data: 1000000
  • starting plasticity: 2
  • ending plasticity: 0.001
  • number of times to test grammar: 100000
  • use Magri update rule: yes

  • Stratal:
  • number of times to go through data: 1000000
  • starting plasticity: 20
  • ending plasticity: 20
  • number of times to test grammar: 100000
  • use Magri update rule: no

To examine the results, we read in the collated files:

conn <- file("CollateRuns_Tagalog_NoMagri.txt",open="r")
collated_NoMagri <- readLines(conn)
close(conn)

conn <- file("CollateRuns_Tagalog_WithMagri.txt",open="r")
collated_WithMagri <- readLines(conn)
close(conn)

conn <- file("CollateRuns_Tagalog_Stratal.txt",open="r")
collated_Stratal <- readLines(conn)
close(conn)

A function to read in all 1000 runs from a collated file, and find the one with the best log likelihood: (I wrote this function so that if the very last grammar gets ignored; this is equivalent to doing 999 runs rather than 1000. It would have required a bunch of extra programming to get the last one too.)

find_best_grammar <- function(collatedFile, backoff=1/100000) {
  #initialize
  best_log_like <- -Inf #the log likelihood to beat; starts out infinitely bad
  current_log_like <- 0
  
  best_grammar <- data.frame(constraintName=character(0), rankingValue=numeric(0))
  current_grammar <- data.frame(constraintName=character(0), rankingValue=numeric(0))
  
  best_probabilities <- data.frame(input=character(0), output=character(0), observedFreq=numeric(0), predictedProb=numeric(0))
  current_probabilities <- data.frame(input_C=character(0), input_morph=character(0), output=character(0), observedFreq=numeric(0), predictedProb=numeric(0))
  
  current_index <- 1
  
  #step through lines of collated file
  for(i in 1:length(collatedFile)) {
    #parse the line
    myLine <- strsplit(collatedFile[i], split="\t")
    
    #if index has gone up, then before going on to the next grammar, it's time to assess the one just finished
    if(as.numeric(myLine[[1]][2]) > current_index) {
      if(current_log_like > best_log_like) { #this grammar is the new winner
        best_log_like <- current_log_like
        best_grammar <- current_grammar
        best_probabilities <- current_probabilities
      }
      #either way [i.e., whether latest grammar was new winner or not], start the grammar, probabilities, and log likelihood fresh and update the index
      current_grammar <- data.frame(constraintName=character(0), rankingValue=numeric(0))
      current_probabilities <- data.frame(input_C=character(0), input_morph=character(0), output=character(0), observedFreq=numeric(0), predictedProb=numeric(0))
      current_log_like <- 0
      current_index <- as.numeric(myLine[[1]][2])
    }
    
    #process current line
    if(myLine[[1]][1] == "G") { #if starts with G, add to grammar
        current_grammar <- rbind(current_grammar, data.frame(constraintName=myLine[[1]][3], rankingValue=myLine[[1]][4]))  
    } else if(myLine[[1]][1] == "O" & (myLine[[1]][6]=="unsubstituted" | myLine[[1]][6]=="substituted")) { #if starts with O [that's a capital letter, not a number], add to probabilities, and update log likelihood; don't both if it's one of those weird lines at the end of each group with a nonexistent output
        #split out the word1 and the word2-group
        myC <- strsplit(myLine[[1]][4],split=" ",fixed=TRUE)[[1]][1]
        myMorph <- str_sub(myLine[[1]][4],str_length(myC)+2)
      
      #add line to data frame
        current_probabilities <- rbind(current_probabilities, data.frame(input_C=myC, input_morph=myMorph, output=myLine[[1]][6], observedFreq=myLine[[1]][7], predictedProb=myLine[[1]][8]))
        myProb <- as.numeric(myLine[[1]][8])
        if(myProb == 0) {
          myProb <- backoff
        } else if (myProb==1) {
          myProb <- 1 - backoff
        }                
        current_log_like <- current_log_like +  as.numeric(myLine[[1]][7])*log(myProb)                              
    }
  }
  #return best grammar, probabilities, and log likelihood
  return(c(best_grammar,best_probabilities,best_log_like))
}

Find the best grammar from each group:

best_NoMagri <- find_best_grammar(collated_NoMagri) #-367.5775
best_WithMagri <- find_best_grammar(collated_WithMagri) #-314.6419
best_Stratal <- find_best_grammar(collated_Stratal) #-645.725

Print the grammars and their log likelihoods:

best_NoMagri_grammar <- data.frame(matrix(unlist(best_NoMagri[1:2]), nrow=11, byrow=F))
colnames(best_NoMagri_grammar) <- names(best_NoMagri)[1:2]
best_NoMagri_grammar
##      constraintName       rankingValue
## 1               *NC   185.34237510571 
## 2          *[m/n/ng   21.583468845981 
## 3            *[n/ng  180.719324679704 
## 4              *[ng  180.922040050001 
## 5            NasSub  178.416531154019 
## 6  Faith-maN- other -376.180006827324 
## 7    Faith-paN-RED-  172.665408206408 
## 8    Faith-maN-RED-  178.542294785941 
## 9    Faith-maN- adv  177.589151734176 
## 10  Faith-paN- noun  183.121009685449 
## 11   Faith-paN- res  185.845611261336
best_NoMagri[[8]][1] #log likelihood
## [1] -367.6
best_WithMagri_grammar <- data.frame(matrix(unlist(best_WithMagri[1:2]), nrow=11, byrow=F))
colnames(best_WithMagri_grammar) <- names(best_WithMagri)[1:2]
best_WithMagri_grammar
##      constraintName       rankingValue
## 1               *NC -304.152370006544 
## 2          *[m/n/ng -2355.49149032455 
## 3            *[n/ng -315.553354844323 
## 4              *[ng -308.219712133132 
## 5            NasSub -310.720345034178 
## 6  Faith-maN- other -314.592178303321 
## 7    Faith-paN-RED-   -312.7112155541 
## 8    Faith-maN-RED- -309.958550786064 
## 9    Faith-maN- adv -309.024574806601 
## 10  Faith-paN- noun -306.988839272625 
## 11   Faith-paN- res -302.216131600666
best_WithMagri[[8]][1] #log likelihood
## [1] -314.6
best_Stratal_grammar <- data.frame(matrix(unlist(best_Stratal[1:2]), nrow=11, byrow=F))
colnames(best_Stratal_grammar) <- names(best_Stratal)[1:2]
best_Stratal_grammar
##      constraintName rankingValue
## 1               *NC        2480 
## 2          *[m/n/ng       -2200 
## 3            *[n/ng        2400 
## 4              *[ng        2440 
## 5            NasSub        2400 
## 6  Faith-maN- other      -13400 
## 7    Faith-paN-RED-        1960 
## 8    Faith-maN-RED-        2400 
## 9    Faith-maN- adv        2380 
## 10  Faith-paN- noun        2480 
## 11   Faith-paN- res        2480
best_Stratal[[8]][1] #log likelihood
## [1] -645.7

Add the probabilities to the main data frame:

for(i in 1:length(nas_sub$consonant)) {
  nas_sub$best_NoMagri[i] <- as.numeric(as.character(best_NoMagri$predictedProb[best_NoMagri$input_C==nas_sub$consonant[i] & best_NoMagri$input_morph==nas_sub$morph[i] & best_NoMagri$output=="substituted"]))
}

for(i in 1:length(nas_sub$consonant)) {
  nas_sub$best_WithMagri[i] <- as.numeric(as.character(best_WithMagri$predictedProb[best_WithMagri$input_C==nas_sub$consonant[i] & best_WithMagri$input_morph==nas_sub$morph[i] & best_WithMagri$output=="substituted"]))
}

for(i in 1:length(nas_sub$consonant)) {
  nas_sub$best_Stratal[i] <- as.numeric(as.character(best_Stratal$predictedProb[best_Stratal$input_C==nas_sub$consonant[i] & best_Stratal$input_morph==nas_sub$morph[i] & best_Stratal$output=="substituted"]))
}

Interaction plots (only WithMagri, since it was better than NoMagri)

par(mar=c(5,5,2,6))
suppressWarnings(with(nas_sub, {
  interaction.plot(x.factor=consonant, 
  trace.factor=morph_unicode, 
  response=best_WithMagri,
  xlab="stem-initial consonant",ylab="nas. sub. rate, best StOT model"
  , trace.label="prefix construction", fixed=TRUE, type="b", pch=c(0,1,2,20,18,15))
}))

plot of chunk plotGLAResultsTagalog

#Since this will appear in the paper, also do a PNG file:
png(file="Tagalog_StOTNoMagri_model.png",width=myResMultiplier*460,height=myResMultiplier*300, res=myResMultiplier*72,family=myFontFamily)
par(mar=c(5,4,2,0)+0.1)
with(nas_sub, {
  interaction.plot(x.factor=consonant, 
  trace.factor=morph_unicode, 
  response=best_WithMagri,
  xlab="stem-initial consonant",ylab="nas. sub. rate, bestStOT model"
  , trace.label="prefix construction", fixed=TRUE, type="b", pch=c(0,1,2,20,18,15))
})
dev.off()
## pdf 
##   2
par(mar=c(5,5,2,6))
suppressWarnings(with(nas_sub, {
  interaction.plot(x.factor=consonant, 
  trace.factor=morph_unicode, 
  response=best_Stratal,
  xlab="stem-initial consonant",ylab="nas. sub. rate, best Stratal model"
  , trace.label="prefix construction", fixed=TRUE, type="b", pch=c(0,1,2,20,18,15))
}))

plot of chunk plotGLAResultsTagalog

#Since this will appear in the paper, also do a PNG file:
png(file="Tagalog_Stratal_model.png",width=myResMultiplier*460,height=myResMultiplier*300, res=myResMultiplier*72,family=myFontFamily)
par(mar=c(5,4,2,0)+0.1)
with(nas_sub, {
  interaction.plot(x.factor=consonant, 
  trace.factor=morph_unicode, 
  response=best_Stratal,
  xlab="stem-initial consonant",ylab="nas. sub. rate, best Stratal model"
  , trace.label="prefix construction", fixed=TRUE, type="b", pch=c(0,1,2,20,18,15))
})
dev.off()
## pdf 
##   2

StOT model with separate constraint for each consonant (“pointwise”)

Write an OTSoft input file with these constraints

  • *N+p: substitute [p] (i.e., don’t have nasal, morpheme boundary, p)
  • *N+t/s: substitute [t] and [s]
  • N+k, N+b, N+d, N+g: etc.

and the Unif constraints as before:

  • UNIF-paN- res, UNIF-paN- noun, UNIF-maN-RED-, UNIF-maN- adv, UNIF-paN-RED-, UNIF-maN- other
write(c("\t\t\t*N+p\t*N+t/s\t*N+k\t*N+b\t*N+d\t*N+g\tFaith-maN- other\tFaith-paN-RED-\tFaith-maN-RED-\tFaith-maN- adv\tFaith-paN- noun\tFaith-paN- res","\t\t\t*N+p\t*N+t/s\t*N+k\t*N+b\t*N+d\t*N+g\tFaith-maN- other\tFaith-paN-RED-\tFaith-maN-RED-\tFaith-maN- adv\tFaith-paN- noun\tFaith-paN- res"), file="Tagalog_for_OTSoft_Pointwise.txt",append=FALSE) #overwrite anything there already

#Loop through data frame, turning each row into a two-row tableau:

for(i in 1:length(nas_sub$consonant)) {
  #make the input
  myInput <- paste(nas_sub$consonant[i], nas_sub$morph[i])
  
  #make the violation vectors for the two candidates
  mySubst_violations <- rep(0,12) #initialize to all zeros
  myUnsubst_violations <- rep(0,12) #initialize to all zeros
  if(nas_sub$consonant[i] == "p") {
    myUnsubst_violations[1] <- 1 #*N+p
  } else if(nas_sub$consonant[i] == "t/s") {
    myUnsubst_violations[2] <- 1 #*N+t/s
  } else if(nas_sub$consonant[i] == "k") {
    myUnsubst_violations[3] <- 1 #*N+k
  } else if(nas_sub$consonant[i] == "b") {
    myUnsubst_violations[4] <- 1 #*N+t/s
  } else if(nas_sub$consonant[i] == "d") {
    myUnsubst_violations[5] <- 1 #*N+d
  } else if(nas_sub$consonant[i] == "g") {
    myUnsubst_violations[6] <- 1 #*N+g
  }
  
  if(nas_sub$morph[i] == "maN- other") {
    mySubst_violations[7] <- 1
  } else if(nas_sub$morph[i] == "paN-RED-") {
    mySubst_violations[8] <- 1
  } else if(nas_sub$morph[i] == "maN-RED-") {
    mySubst_violations[9] <- 1
  } else if(nas_sub$morph[i] == "maN- adv.") {
    mySubst_violations[10] <- 1
  }else if(nas_sub$morph[i] == "paN- noun") {
    mySubst_violations[11] <- 1
  }else if(nas_sub$morph[i] == "paN- res.") {
    mySubst_violations[12] <- 1
  } 
  mySubst_violations <- paste(as.character(mySubst_violations),collapse="\t")
  myUnsubst_violations <- paste(as.character(myUnsubst_violations),collapse="\t")
  
  #print the two rows
  write(c(paste(myInput,"substituted",nas_sub$subst_count[i],mySubst_violations,sep="\t"),paste("","unsubstituted",nas_sub$unsubst_count[i],myUnsubst_violations,sep="\t")),file="Tagalog_for_OTSoft_Pointwise.txt",append=TRUE)
}
#Because we're using the multifold GLA tool, I don't thing we need the extra line here.

Go outside program to run OTSoft, then copy results folder (including CollateRuns.txt, which will be generated outside the folder and so much be moved) to the directory that this script is in. Get the best grammar, using same functions defined above:

conn <- file("FilesForTagalog_for_OTSoft_Pointwise_NoMagri/CollateRuns.txt",open="r")
collated_NoMagri_pointwise <- readLines(conn)
close(conn)

conn <- file("FilesForTagalog_for_OTSoft_Pointwise_WithMagri/CollateRuns.txt",open="r")
collated_WithMagri_pointwise <- readLines(conn)
close(conn)

best_NoMagri_pointwise <- find_best_grammar(collated_NoMagri_pointwise) # -269.5477
best_WithMagri_pointwise <- find_best_grammar(collated_WithMagri_pointwise) #-301.2741

best_NoMagri_grammar_pointwise <- data.frame(matrix(unlist(best_NoMagri_pointwise[1:2]), nrow=12, byrow=F))
colnames(best_NoMagri_grammar_pointwise) <- names(best_NoMagri_pointwise)[1:2]
best_NoMagri_grammar_pointwise
##      constraintName       rankingValue
## 1              *N+p  105.414461213438 
## 2            *N+t/s  104.207601630031 
## 3              *N+k  103.090639156339 
## 4              *N+b  100.145366685766 
## 5              *N+d  96.1249529465515 
## 6              *N+g  91.0427383147581 
## 7  Faith-maN- other  95.8667245776422 
## 8    Faith-paN-RED-  97.1423608436616 
## 9    Faith-maN-RED-  99.9153473148658 
## 10   Faith-maN- adv  99.2422484730615 
## 11  Faith-paN- noun  102.516614475586 
## 12   Faith-paN- res  105.290944368299
best_NoMagri_pointwise[[8]][1] #log likelihood
## [1] -269.5
best_WithMagri_grammar_pointwise <- data.frame(matrix(unlist(best_WithMagri_pointwise[1:2]), nrow=12, byrow=F))
colnames(best_WithMagri_grammar_pointwise) <- names(best_WithMagri_pointwise)[1:2]
best_WithMagri_grammar_pointwise
##      constraintName       rankingValue
## 1              *N+p   -3835.948197276 
## 2            *N+t/s -3839.91358991296 
## 3              *N+k -3839.64807817019 
## 4              *N+b -3843.84019294074 
## 5              *N+d -3845.91415551674 
## 6              *N+g -3848.24712543703 
## 7  Faith-maN- other -3846.77745851803 
## 8    Faith-paN-RED- -3845.41540262222 
## 9    Faith-maN-RED- -3843.61460930535 
## 10   Faith-maN- adv -3843.43178633214 
## 11  Faith-paN- noun -3841.84813886068 
## 12   Faith-paN- res -3836.39266627683
best_WithMagri_pointwise[[8]][1] #log likelihood
## [1] -301.3

Add predicted probabilities to data frame:

for(i in 1:length(nas_sub$consonant)) {
  nas_sub$best_NoMagri_pointwise[i] <- as.numeric(as.character(best_NoMagri_pointwise$predictedProb[best_NoMagri_pointwise$input_C==nas_sub$consonant[i] & best_NoMagri_pointwise$input_morph==nas_sub$morph[i] & best_NoMagri_pointwise$output=="substituted"]))
}

for(i in 1:length(nas_sub$consonant)) {
  nas_sub$best_WithMagri_pointwise[i] <- as.numeric(as.character(best_WithMagri_pointwise$predictedProb[best_WithMagri_pointwise$input_C==nas_sub$consonant[i] & best_WithMagri_pointwise$input_morph==nas_sub$morph[i] & best_WithMagri_pointwise$output=="substituted"]))
}

Plot results of best grammar:

par(mar=c(5,5,2,6))
suppressWarnings(with(nas_sub, {
  interaction.plot(x.factor=consonant, 
  trace.factor=morph_unicode, 
  response=best_NoMagri_pointwise, #since NoMagri fit better in this case
  xlab="stem-initial consonant",ylab="nas. sub. rate, best StOT model,\nseparate constraint for each consonant"
  , trace.label="prefix construction", fixed=TRUE, type="b", pch=c(0,1,2,20,18,15))
}))

plot of chunk plotBestPointwiseStOT

#Since this will appear in the paper, also do a PNG file:
png(file="Tagalog_StOTNoMagri_pointwise_model.png",width=myResMultiplier*460,height=myResMultiplier*300, res=myResMultiplier*72,family=myFontFamily)
par(mar=c(5,5,2,0)+0.1)
with(nas_sub, {
  interaction.plot(x.factor=consonant, 
  trace.factor=morph_unicode, 
  response=best_NoMagri_pointwise,
  xlab="stem-initial consonant",ylab="nas. sub. rate, bestStOT model, \none constraint per consonant"
  , trace.label="prefix construction", fixed=TRUE, type="b", pch=c(0,1,2,20,18,15))
})
dev.off()
## pdf 
##   2

Pseudo-Hasse diagram for discussion of why StOT with original constraints doesn’t work

Create something that looks like a Hasse diagram, but where the vertical axis (ranking values) is to scale. We want these vertical values:

106 NC̥ 100 [root ŋ 94 UNIF-paŋ-nom. 93 NasSub 89 *[root m/n/ŋ UNIF-maŋ-other

and then lines connecting each level (and two lines coming out of NasSub).

constraint_names <- c("*NC\U0325 (106)","*[ \U014B (100)","U\U0274\U026Aғ-pa\U014B-\U0274\U1D0F\U1D1C\U0274 (94)","N\U1D00sS\U1D1C\U0299 (93)","*[ m/n/\U014B (89)", "U\U0274\U026Aғ-ma\U014B-\U1D0F\U1D1B\U029C\U1D07\U0280 (89)")
x <- c(10,10,10,10,7,13) #horizontal positions
y <- c(106,100,94,93,89,89) #ranking values
vert_increment <- 0.4

plot(x, y, ylab="ranking value", type="n", xaxt="n", xlim=c(5,16), xlab="") #empty plot, no x axis ticks or labels
suppressWarnings(text(x, y, labels = constraint_names)) #add labels at the (x,y) coordinates given
segments(x[1],y[1]-vert_increment,x[2],y[2]+vert_increment) #add line segment from *NC to *[root N
segments(x[2],y[2]-vert_increment,x[3],y[3]+vert_increment) #from *[root N to Unif paN
segments(x[3],y[3]-vert_increment,x[4],y[4]+vert_increment) #from Unif to NasSub
segments(x[4],y[4]-vert_increment,x[5],y[5]+vert_increment) #from NasSub to *[root m/n/N
segments(x[4],y[4]-vert_increment,x[6],y[6]+vert_increment) #from NasSub to Unif maN

plot of chunk pseudoHasse

#write it to a file
png(file="Tagalog_PseudoHasse.png",width=myResMultiplier*300,height=myResMultiplier*460, res=myResMultiplier*72,family=myFontFamily)
par(mar=c(2,4,2,0)+0.1)
plot(x, y, ylab="ranking value", type="n", xaxt="n", xlim=c(5,15.5), xlab="") #empty plot, no x axis ticks or labels
text(x, y, labels = constraint_names) #add labels at the (x,y) coordinates given
segments(x[1],y[1]-vert_increment,x[2],y[2]+vert_increment) #add line segment from *NC to *[root N
segments(x[2],y[2]-vert_increment,x[3],y[3]+vert_increment) #from *[root N to Unif paN
segments(x[3],y[3]-vert_increment,x[4],y[4]+vert_increment) #from Unif to NasSub
segments(x[4],y[4]-vert_increment,x[5],y[5]+vert_increment) #from NasSub to *[root m/n/N
segments(x[4],y[4]-vert_increment,x[6],y[6]+vert_increment) #from NasSub to Unif maN
dev.off()
## pdf 
##   2

Log likelihood of each constraint model

Note that we already retrieved the log likelihood of the multiplicative model above. For reference, it is -1 * myOptimization\(value, or `{r -1*myOptimization\)value}`. We also already got them for the best StOT and Stratal models.

Make a function for getting log Likelihood using a column from nas_sub:

#for NHG models, where we use similation to get predicted probabilities, we could get some zeroes. Since we run 1,000,000 trials, we back of zeros to 1/1000000 by default.

getLogLike <- function(colName, backoff = 1/1000000) {
  log_likelihood <- 0 #initialize
  for(i in 1:length(nas_sub$consonant)) {
    sub_prob <- nas_sub[i,colName]
    if(sub_prob == 0) { #avoid logs of 0 or 1
      sub_prob <- sub_prob + backoff
    } else if(sub_prob == 1) {
      sub_prob <- sub_prob - backoff
    }
    log_likelihood <- log_likelihood + nas_sub$subst_count[i] * log(sub_prob) + nas_sub$unsubst_count[i] * log(1-sub_prob)
  }
  return(log_likelihood)
}

Now get them all (including ones we already got above, as a check that the function is correct). I’m doing it with the default backoff (1/1000000), appropriate for NHG, where grammar was checked 1,000,000 times, and also the backoff used for picking best StOT and Stratal grammars (100,000, since those grammars were checked 100,000 times)

#multiplicative:
getLogLike("multiplicative_prediction") #-292.3079
## [1] -292.3
getLogLike("multiplicative_prediction", backoff=1/100000) #no 0s or 1s, so backoff doesn't matter
## [1] -292.3
# MaxEnt:
getLogLike("maxentPredictions") #-284.8209
## [1] -284.8
#NHG:
getLogLike("NHG_Predictions") #-293.0787
## [1] -294.5
getLogLike("NHG_Predictions", backoff=1/100000) #-293.0787: no 0s or 1s, so backoff doesn't matter
## [1] -294.5
#best StOT (no Magri):
getLogLike("best_NoMagri") #-381.3918
## [1] -381.4
getLogLike("best_NoMagri", backoff=1/100000) #-367.5775
## [1] -367.6
#best StOT (with Magri):
getLogLike("best_WithMagri") #-314.6419
## [1] -314.6
getLogLike("best_WithMagri", backoff=1/100000) #-314.6419: no 0s or 1s, so backoff doesn't matter
## [1] -314.6
#best Stratal
getLogLike("best_Stratal") #-738.9729
## [1] -739
getLogLike("best_Stratal", backoff=1/100000) #-645.725
## [1] -645.7

Baseline (“perfect”) model

For comparison, we also include a baseline model, with perfect frequency matching for each combination of word2-group and word1. It will still not fit the data perfectly, because it it predicts voculence 60% of the time, for a category that has 60% voculence, it will still sometimes predict voculence when the datum is non-voculent (0.6 * 0.4), and vice-versa (0.4 * 0.6), for a total error rate of 48%. This provides a ceiling on how well any model (that makes the same distinctions ours do) could do.

To do this, we just run the getLogLike() function again but for the observed rates:

getLogLike("subst_rate") #-254.6602
## [1] -254.7
getLogLike("subst_rate", backoff=1/100000) #-254.6639
## [1] -254.7