This is an R Markdown document. Markdown is a simple formatting syntax for authoring web pages.
If you are viewing this in a web browser, then an .Rmd file has been “knit” into a web page that includes the results of running embedded chunks of R code.
If you are viewing this in RStudio, When you click the Knit HTML
button a web page will be generated that includes both content as well as the output of any embedded R code chunks within the document. You may need to use the Session
menu to set the working directory to Source file location
.
This document accompanies one section of a manuscript by ANONYMOUS.
Set up graphical parameters and other preliminaries for the whole script
set.seed(123) #in case of any randomization
#load packages
require(grid) || install.packages("grid")
## Loading required package: grid
## [1] TRUE
require(grid)
require(vcd) || install.packages("vcd")
## Loading required package: vcd
## [1] TRUE
require(vcd)
require(lme4) || install.packages("lme4")
## Loading required package: lme4
## Loading required package: Matrix
## Loading required package: Rcpp
## [1] TRUE
require(lme4)
require(car) || install.packages("car")
## Loading required package: car
## [1] TRUE
require(car)
require(multcomp) || install.packages("multcomp")
## Loading required package: multcomp
## Loading required package: mvtnorm
## Loading required package: survival
## Loading required package: splines
## Loading required package: TH.data
## [1] TRUE
require(multcomp)
require(stringr) || install.packages("stringr")
## Loading required package: stringr
## [1] TRUE
require(stringr)
#set up plot parameters
myFontFamily="serif"
par(font=list(family=myFontFamily)) #for most plots
#Unfortunately, this doesn't carry over when a .png file is written, so it is necessary to include
#family=myFontFamily
#as one of the arguments to the png() function
#Also unfortunately, this doesn't carry over to mosaic plots, which use gpar (and gpar can't be set like this for the whole script). Instead, we create this object:
gpSerif <- gpar(fontfamily=myFontFamily)
#and then each call to mosaic() has to include the line
#gp_varnames=gpSerif,gp_labels=gpSerif,
#Note that for adding the numbers to the tiles in the mosaic plots with mtext(), we just use
#, fontfamily=myFontFamily
#in the list of gpar() arguments
myResMultiplier <- 5 #default is 72 ppi; using this in every call to png() will make it 360
Enter words with nasal-substituting morphology from Leo English’s 1987 Tagalog-English Dictionary into a FileMaker file.
Extract counts of substituted, unsubstituted, and varying for each prefix type and each consonant.
Place these counts into a data frame:
#The raw data
consonant <- c("p","p","p","p","p","p","t/s","t/s","t/s","t/s","t/s","t/s","k","k","k","k","k","k","b","b","b","b","b","b","d","d","d","d","d","d","g","g","g","g","g","g")
morph <- c("maN- other", "paN-RED-", "maN-RED-", "maN- adv.", "paN- noun", "paN- res.","maN- other", "paN-RED-", "maN-RED-", "maN- adv.", "paN- noun", "paN- res.","maN- other", "paN-RED-", "maN-RED-", "maN- adv.", "paN- noun", "paN- res.","maN- other", "paN-RED-", "maN-RED-", "maN- adv.", "paN- noun", "paN- res.","maN- other", "paN-RED-", "maN-RED-", "maN- adv.", "paN- noun", "paN- res.","maN- other", "paN-RED-", "maN-RED-", "maN- adv.", "paN- noun", "paN- res.")
morph_unicode <- c("ma\U014B- other", "pa\U014B-\U0280\U1D07\U1D05-", "ma\U014B-\U0280\U1D07\U1D05-", "ma\U014B- adv", "pa\U014B- noun", "pa\U014B- res","ma\U014B- other", "pa\U014B-\U0280\U1D07\U1D05-", "ma\U014B-\U0280\U1D07\U1D05-", "ma\U014B- adv", "pa\U014B- noun", "pa\U014B- res","ma\U014B- other", "pa\U014B-\U0280\U1D07\U1D05-", "ma\U014B-\U0280\U1D07\U1D05-", "ma\U014B- adv", "pa\U014B- noun", "pa\U014B- res","ma\U014B- other", "pa\U014B-\U0280\U1D07\U1D05-", "ma\U014B-\U0280\U1D07\U1D05-", "ma\U014B- adv", "pa\U014B- noun", "pa\U014B- res","ma\U014B- other", "pa\U014B-\U0280\U1D07\U1D05-", "ma\U014B-\U0280\U1D07\U1D05-", "ma\U014B- adv", "pa\U014B- noun", "pa\U014B- res","ma\U014B- other", "pa\U014B-\U0280\U1D07\U1D05-", "ma\U014B-\U0280\U1D07\U1D05-", "ma\U014B- adv", "pa\U014B- noun", "pa\U014B- res")
subst_count <- c(65,39,18,11,30.5,4.5,71,63,44,51,60.5,5,74.5,25,20,11,9.5,2,66,29.5,16.5,6,15,1.5,7,3,1,0,1,0,0,1,0,0,0,0)
unsubst_count <-c(0,0,0,0,6.5,4.5,0,0,1,0,29.5,13,0.5,1,2,0,9.5,5,6,5.5,21.5,6,30,17.5,3,7,12,12,17,7,13,17,12,12,27,4)
#Combine into data frame
nas_sub <- data.frame(consonant, morph, morph_unicode, subst_count, unsubst_count)
#reorder levels of consonant and morph
nas_sub$consonant <- factor(nas_sub$consonant,levels(nas_sub$consonant)[c(5,6,4,1,2,3)])
nas_sub$morph <- factor(nas_sub$morph,levels(nas_sub$morph)[c(2,6,1,3,4,5)])
nas_sub$morph_unicode <- factor(nas_sub$morph_unicode,levels(nas_sub$morph_unicode)[c(2,6,1,3,4,5)])
#Add column for substitution rate
nas_sub$subst_rate <- nas_sub$subst_count / (nas_sub$subst_count + nas_sub$unsubst_count)
#Here's what the data frame looks like (not including the Unicode column):
nas_sub[,c(1,2,4,5,6)]
## consonant morph subst_count unsubst_count subst_rate
## 1 p maN- other 65.0 0.0 1.00000
## 2 p paN-RED- 39.0 0.0 1.00000
## 3 p maN-RED- 18.0 0.0 1.00000
## 4 p maN- adv. 11.0 0.0 1.00000
## 5 p paN- noun 30.5 6.5 0.82432
## 6 p paN- res. 4.5 4.5 0.50000
## 7 t/s maN- other 71.0 0.0 1.00000
## 8 t/s paN-RED- 63.0 0.0 1.00000
## 9 t/s maN-RED- 44.0 1.0 0.97778
## 10 t/s maN- adv. 51.0 0.0 1.00000
## 11 t/s paN- noun 60.5 29.5 0.67222
## 12 t/s paN- res. 5.0 13.0 0.27778
## 13 k maN- other 74.5 0.5 0.99333
## 14 k paN-RED- 25.0 1.0 0.96154
## 15 k maN-RED- 20.0 2.0 0.90909
## 16 k maN- adv. 11.0 0.0 1.00000
## 17 k paN- noun 9.5 9.5 0.50000
## 18 k paN- res. 2.0 5.0 0.28571
## 19 b maN- other 66.0 6.0 0.91667
## 20 b paN-RED- 29.5 5.5 0.84286
## 21 b maN-RED- 16.5 21.5 0.43421
## 22 b maN- adv. 6.0 6.0 0.50000
## 23 b paN- noun 15.0 30.0 0.33333
## 24 b paN- res. 1.5 17.5 0.07895
## 25 d maN- other 7.0 3.0 0.70000
## 26 d paN-RED- 3.0 7.0 0.30000
## 27 d maN-RED- 1.0 12.0 0.07692
## 28 d maN- adv. 0.0 12.0 0.00000
## 29 d paN- noun 1.0 17.0 0.05556
## 30 d paN- res. 0.0 7.0 0.00000
## 31 g maN- other 0.0 13.0 0.00000
## 32 g paN-RED- 1.0 17.0 0.05556
## 33 g maN-RED- 0.0 12.0 0.00000
## 34 g maN- adv. 0.0 12.0 0.00000
## 35 g paN- noun 0.0 27.0 0.00000
## 36 g paN- res. 0.0 4.0 0.00000
All morphology lumped together–note that this includes constructions other than the 6 in nas_sub
, so total counts are higher–data separated by consonant:
#make an object with all the data in it
all_Cs<-structure(c(10,40,17,100,70,97, 253,430,185,177,25,1),
.Dim=as.integer(c(6,2)),
.Dimnames=structure(list(Stem.initial.obstruent=
c("p", "t/s", "k", "b", "d", "g"),
Subst.behavior=c("unsubst","subst")),
.Names=c("Stem-initial consonant",
"Behavior according to dictionary")),
class="table")
all_Cs
## Behavior according to dictionary
## Stem-initial consonant unsubst subst
## p 10 253
## t/s 40 430
## k 17 185
## b 100 177
## d 70 25
## g 97 1
#Here's the mosaic plot with no numbers on the tiles. A nicer version is being written to a PNG file next:
mosaic(all_Cs,direction="v",pop=FALSE,
gp_varnames=gpSerif,gp_labels=gpSerif,
gp=gpar(fill=c("white","black"))
#gp=gpar(fill=c("white","black"), fontfamily="HersheySerif")
,labeling_args=list(
rot_labels=c(left=0)
#pos_varnames="left",pos_labels="right",just_labels="center",just_varnames="center"
,offset_varnames=c(left=1.5),offset_labels=c(left=0.5)
# ,abbreviate=c('Behavior according to dictionary'=5)
))
#set up PNG file
png(file="Tagalog_mosaic_plot_by_consonant.png",width=myResMultiplier*600,height=myResMultiplier*350, res=myResMultiplier*72)
par(mar=c(5,5,2,1)+0.1)
#make a basic mosaic plot, but with no numbers in the tiles
mosaic(all_Cs,direction="v",pop=FALSE,
gp_varnames=gpSerif,gp_labels=gpSerif,
gp=gpar(fill=c("white","black"))
,labeling_args=list(
rot_labels=c(left=0)
#pos_varnames="left",pos_labels="right",just_labels="center",just_varnames="center"
,offset_varnames=c(left=1.5),offset_labels=c(left=0.5)
# ,abbreviate=c('Behavior according to dictionary'=5)
))
#add numbers to tiles if >0
##numbers for unsubstituted
seekViewport("cell:Stem-initial consonant=p,Behavior according to dictionary=unsubst")
if(all_Cs[1,1]>0) {grid.text(all_Cs[1,1],gp=gpar(col="black",fontfamily=myFontFamily))}
seekViewport("cell:Stem-initial consonant=t/s,Behavior according to dictionary=unsubst")
if(all_Cs[2,1]>0) {grid.text(all_Cs[2,1],gp=gpar(col="black",fontfamily=myFontFamily))}
seekViewport("cell:Stem-initial consonant=k,Behavior according to dictionary=unsubst")
if(all_Cs[3,1]>0) {grid.text(all_Cs[3,1],gp=gpar(col="black",fontfamily=myFontFamily))}
seekViewport("cell:Stem-initial consonant=b,Behavior according to dictionary=unsubst")
if(all_Cs[4,1]>0) {grid.text(all_Cs[4,1],gp=gpar(col="black",fontfamily=myFontFamily))}
seekViewport("cell:Stem-initial consonant=d,Behavior according to dictionary=unsubst")
if(all_Cs[5,1]>0) {grid.text(all_Cs[5,1],gp=gpar(col="black",fontfamily=myFontFamily))}
seekViewport("cell:Stem-initial consonant=g,Behavior according to dictionary=unsubst")
if(all_Cs[6,1]>0) {grid.text(all_Cs[6,1],gp=gpar(col="black",fontfamily=myFontFamily))}
##numbers for substituted
seekViewport("cell:Stem-initial consonant=p,Behavior according to dictionary=subst")
if(all_Cs[1,2]>0) {grid.text(all_Cs[1,2],gp=gpar(col="white",fontfamily=myFontFamily))}
seekViewport("cell:Stem-initial consonant=t/s,Behavior according to dictionary=subst")
if(all_Cs[2,2]>0) {grid.text(all_Cs[2,2],gp=gpar(col="white",fontfamily=myFontFamily))}
seekViewport("cell:Stem-initial consonant=k,Behavior according to dictionary=subst")
if(all_Cs[3,2]>0) {grid.text(all_Cs[3,2],gp=gpar(col="white",fontfamily=myFontFamily))}
seekViewport("cell:Stem-initial consonant=b,Behavior according to dictionary=subst")
if(all_Cs[4,2]>0) {grid.text(all_Cs[4,2],gp=gpar(col="white",fontfamily=myFontFamily))}
seekViewport("cell:Stem-initial consonant=d,Behavior according to dictionary=subst")
if(all_Cs[5,2]>0) {grid.text(all_Cs[5,2],gp=gpar(col="white",fontfamily=myFontFamily))}
#Suppressing the number 1 for g, because it doesn't quite fit in the tile. In the paper, there's a textbox next to the figure with the number 1 instead.
#seekViewport("cell:Stem-initial consonant=g,Behavior according to dictionary=subst")
#if(all_Cs[6,2]>0) {grid.text(all_Cs[6,2],gp=gpar(col="white"))}
dev.off()
## pdf
## 2
All consonants lumped together, data separated by morphology. It would be elegant to get the counts from the tag
table as above, but we already had code with the numbers hard-coded in:
#set up the data
all_prefixes<-structure(c(26,37,53,48,116,51, 1,1,3,0,51,10, 317,177,105,83,97,8 ),
.Dim=as.integer(c(6,3)),
.Dimnames=structure(list(Stem.initial.obstruent=
c("maN- other", "paN-RED-", "maN-RED-", "maN- adv", "paN- noun", "paN- res"), #unfortunately, mosaic() can't handle Unicode in dimension names
Subst.behavior=c("unsubst","vary","subst")),
.Names=c("Prefixing construction",
"Behavior according to dictionary")),
class="table")
all_prefixes
## Behavior according to dictionary
## Prefixing construction unsubst vary subst
## maN- other 26 1 317
## paN-RED- 37 1 177
## maN-RED- 53 3 105
## maN- adv 48 0 83
## paN- noun 116 51 97
## paN- res 51 10 8
#a quick version of the plot, with no numbers written on the tiles
par(mar=c(5,5,2,1)+0.1)
mosaic(all_prefixes,direction="v",pop=FALSE,
gp_varnames=gpSerif,gp_labels=gpSerif,
gp=gpar(fill=c("white","grey","black"))
,labeling_args=list(
rot_labels=c(left=0,top=60)
,offset_varnames=c(left=1.5,top=2.5),offset_labels=c(left=0.5,top=1)
)
,margins=c(top=5)
)
#a nicer version, printed to png
png(file="Tagalog_mosaic_plot_by_morphology.png",width=myResMultiplier*600,height=myResMultiplier*350, res=myResMultiplier*72)
par(mar=c(5,5,2,1)+0.1)
mosaic(all_prefixes,direction="v",pop=FALSE,
gp_varnames=gpSerif,gp_labels=gpSerif,
gp=gpar(fill=c("white","grey","black"))
,labeling_args=list(
rot_labels=c(left=0,top=60)
,offset_varnames=c(left=1.5,top=2.5),offset_labels=c(left=0.5,top=1)
)
,margins=c(top=5))
seekViewport("cell:Prefixing construction=maN- other,Behavior according to dictionary=unsubst")
if(all_prefixes[1,1]>0) {grid.text(all_prefixes[1,1],gp=gpar(col="black",fontfamily=myFontFamily))}
seekViewport("cell:Prefixing construction=paN-RED-,Behavior according to dictionary=unsubst")
if(all_prefixes[2,1]>0) {grid.text(all_prefixes[2,1],gp=gpar(col="black",fontfamily=myFontFamily))}
seekViewport("cell:Prefixing construction=maN-RED-,Behavior according to dictionary=unsubst")
if(all_prefixes[3,1]>0) {grid.text(all_prefixes[3,1],gp=gpar(col="black",fontfamily=myFontFamily))}
seekViewport("cell:Prefixing construction=maN- adv,Behavior according to dictionary=unsubst")
if(all_prefixes[4,1]>0) {grid.text(all_prefixes[4,1],gp=gpar(col="black",fontfamily=myFontFamily))}
seekViewport("cell:Prefixing construction=paN- noun,Behavior according to dictionary=unsubst")
if(all_prefixes[5,1]>0) {grid.text(all_prefixes[5,1],gp=gpar(col="black",fontfamily=myFontFamily))}
seekViewport("cell:Prefixing construction=paN- res,Behavior according to dictionary=unsubst")
if(all_prefixes[6,1]>0) {grid.text(all_prefixes[6,1],gp=gpar(col="black",fontfamily=myFontFamily))}
seekViewport("cell:Prefixing construction=maN- other,Behavior according to dictionary=vary")
if(all_prefixes[1,2]>0) {grid.text(all_prefixes[1,2],gp=gpar(col="black",fontfamily=myFontFamily))}
seekViewport("cell:Prefixing construction=paN-RED-,Behavior according to dictionary=vary")
if(all_prefixes[2,2]>0) {grid.text(all_prefixes[2,2],gp=gpar(col="black",fontfamily=myFontFamily))}
seekViewport("cell:Prefixing construction=maN-RED-,Behavior according to dictionary=vary")
if(all_prefixes[3,2]>0) {grid.text(all_prefixes[3,2],gp=gpar(col="black",fontfamily=myFontFamily))}
seekViewport("cell:Prefixing construction=maN- adv,Behavior according to dictionary=vary")
if(all_prefixes[4,2]>0) {grid.text(all_prefixes[4,2],gp=gpar(col="black",fontfamily=myFontFamily))}
seekViewport("cell:Prefixing construction=paN- noun,Behavior according to dictionary=vary")
if(all_prefixes[5,2]>0) {grid.text(all_prefixes[5,2],gp=gpar(col="black",fontfamily=myFontFamily))}
seekViewport("cell:Prefixing construction=paN- res,Behavior according to dictionary=vary")
if(all_prefixes[6,2]>0) {grid.text(all_prefixes[6,2],gp=gpar(col="black",fontfamily=myFontFamily))}
seekViewport("cell:Prefixing construction=maN- other,Behavior according to dictionary=subst")
if(all_prefixes[1,3]>0) {grid.text(all_prefixes[1,3],gp=gpar(col="white",fontfamily=myFontFamily))}
seekViewport("cell:Prefixing construction=paN-RED-,Behavior according to dictionary=subst")
if(all_prefixes[2,3]>0) {grid.text(all_prefixes[2,3],gp=gpar(col="white",fontfamily=myFontFamily))}
seekViewport("cell:Prefixing construction=maN-RED-,Behavior according to dictionary=subst")
if(all_prefixes[3,3]>0) {grid.text(all_prefixes[3,3],gp=gpar(col="white",fontfamily=myFontFamily))}
seekViewport("cell:Prefixing construction=maN- adv,Behavior according to dictionary=subst")
if(all_prefixes[4,3]>0) {grid.text(all_prefixes[4,3],gp=gpar(col="white",fontfamily=myFontFamily))}
seekViewport("cell:Prefixing construction=paN- noun,Behavior according to dictionary=subst")
if(all_prefixes[5,3]>0) {grid.text(all_prefixes[5,3],gp=gpar(col="white",fontfamily=myFontFamily))}
seekViewport("cell:Prefixing construction=paN- res,Behavior according to dictionary=subst")
if(all_prefixes[6,3]>0) {grid.text(all_prefixes[6,3],gp=gpar(col="white",fontfamily=myFontFamily))}
#windows.options(width=5.6, height=4.48)
dev.off()
## pdf
## 2
Each affix as separate plots, comparing Cs (does not appear in paper):
paN_RED<-structure(c(0,0,0,1,5,7,17,7,0,0,0,0,1,0,0,0,39,22,41,25,29,3,1,17),
.Dim=as.integer(c(8,3)),
.Dimnames=structure(list(Stem.initial.obstruent=
c("p","t","s","k","b","d","g","glottal"),
Subst.behavior=c("unsubst","vary","subst")),
.Names=c("Stem-initial obstruent",
"Behavior according to dictionary")),
class="table")
paN_RED
## Behavior according to dictionary
## Stem-initial obstruent unsubst vary subst
## p 0 0 39
## t 0 0 22
## s 0 0 41
## k 1 0 25
## b 5 1 29
## d 7 0 3
## g 17 0 1
## glottal 7 0 17
mosaic(paN_RED,direction="v",pop=FALSE,
gp=gpar(fill=c("white","grey","black"))
,labeling_args=list(
rot_labels=c(left=0)
#pos_varnames="left",pos_labels="right",just_labels="center",just_varnames="center"
,offset_varnames=c(left=1.5),offset_labels=c(left=0.5)
# ,abbreviate=c('Behavior according to dictionary'=5)
))
seekViewport("cell:Stem-initial obstruent=p,Behavior according to dictionary=unsubst")
if(paN_RED[1,1]>0) {grid.text(paN_RED[1,1],gp=gpar(col="black"))}
seekViewport("cell:Stem-initial obstruent=t,Behavior according to dictionary=unsubst")
if(paN_RED[2,1]>0) {grid.text(paN_RED[2,1],gp=gpar(col="black"))}
seekViewport("cell:Stem-initial obstruent=s,Behavior according to dictionary=unsubst")
if(paN_RED[3,1]>0) {grid.text(paN_RED[3,1],gp=gpar(col="black"))}
seekViewport("cell:Stem-initial obstruent=k,Behavior according to dictionary=unsubst")
if(paN_RED[4,1]>0) {grid.text(paN_RED[4,1],gp=gpar(col="black"))}
seekViewport("cell:Stem-initial obstruent=b,Behavior according to dictionary=unsubst")
if(paN_RED[5,1]>0) {grid.text(paN_RED[5,1],gp=gpar(col="black"))}
seekViewport("cell:Stem-initial obstruent=d,Behavior according to dictionary=unsubst")
if(paN_RED[6,1]>0) {grid.text(paN_RED[6,1],gp=gpar(col="black"))}
seekViewport("cell:Stem-initial obstruent=g,Behavior according to dictionary=unsubst")
if(paN_RED[7,1]>0) {grid.text(paN_RED[7,1],gp=gpar(col="black"))}
seekViewport("cell:Stem-initial obstruent=glottal,Behavior according to dictionary=unsubst")
if(paN_RED[8,1]>0) {grid.text(paN_RED[8,1],gp=gpar(col="black"))}
seekViewport("cell:Stem-initial obstruent=p,Behavior according to dictionary=vary")
if(paN_RED[1,2]>0) {grid.text(paN_RED[1,2],gp=gpar(col="black"))}
seekViewport("cell:Stem-initial obstruent=t,Behavior according to dictionary=vary")
if(paN_RED[2,2]>0) {grid.text(paN_RED[2,2],gp=gpar(col="black"))}
seekViewport("cell:Stem-initial obstruent=s,Behavior according to dictionary=vary")
if(paN_RED[3,2]>0) {grid.text(paN_RED[3,2],gp=gpar(col="black"))}
seekViewport("cell:Stem-initial obstruent=k,Behavior according to dictionary=vary")
if(paN_RED[4,2]>0) {grid.text(paN_RED[4,2],gp=gpar(col="black"))}
seekViewport("cell:Stem-initial obstruent=b,Behavior according to dictionary=vary")
if(paN_RED[5,2]>0) {grid.text(paN_RED[5,2],gp=gpar(col="black"))}
seekViewport("cell:Stem-initial obstruent=d,Behavior according to dictionary=vary")
if(paN_RED[6,2]>0) {grid.text(paN_RED[6,2],gp=gpar(col="black"))}
seekViewport("cell:Stem-initial obstruent=g,Behavior according to dictionary=vary")
if(paN_RED[7,2]>0) {grid.text(paN_RED[7,2],gp=gpar(col="black"))}
seekViewport("cell:Stem-initial obstruent=glottal,Behavior according to dictionary=vary")
if(paN_RED[8,2]>0) {grid.text(paN_RED[8,2],gp=gpar(col="black"))}
seekViewport("cell:Stem-initial obstruent=p,Behavior according to dictionary=subst")
if(paN_RED[1,3]>0) {grid.text(paN_RED[1,3],gp=gpar(col="white"))}
seekViewport("cell:Stem-initial obstruent=t,Behavior according to dictionary=subst")
if(paN_RED[2,3]>0) {grid.text(paN_RED[2,3],gp=gpar(col="white"))}
seekViewport("cell:Stem-initial obstruent=s,Behavior according to dictionary=subst")
if(paN_RED[3,3]>0) {grid.text(paN_RED[3,3],gp=gpar(col="white"))}
seekViewport("cell:Stem-initial obstruent=k,Behavior according to dictionary=subst")
if(paN_RED[4,3]>0) {grid.text(paN_RED[4,3],gp=gpar(col="white"))}
seekViewport("cell:Stem-initial obstruent=b,Behavior according to dictionary=subst")
if(paN_RED[5,3]>0) {grid.text(paN_RED[5,3],gp=gpar(col="white"))}
seekViewport("cell:Stem-initial obstruent=d,Behavior according to dictionary=subst")
if(paN_RED[6,3]>0) {grid.text(paN_RED[6,3],gp=gpar(col="white"))}
seekViewport("cell:Stem-initial obstruent=g,Behavior according to dictionary=subst")
if(paN_RED[7,3]>0) {grid.text(paN_RED[7,3],gp=gpar(col="white"))}
seekViewport("cell:Stem-initial obstruent=glottal,Behavior according to dictionary=subst")
if(paN_RED[8,3]>0) {grid.text(paN_RED[8,3],gp=gpar(col="white"))}
windows.options(width=5.6, height=4.48)
maN_RED<-structure(c(0,0,1,2,20,12,12,6,0,0,0,0,3,0,0,0,18,19,25,20,15,1,0,7),
.Dim=as.integer(c(8,3)),
.Dimnames=structure(list(Stem.initial.obstruent=
c("p","t","s","k","b","d","g","glottal"),
Subst.behavior=c("unsubst","vary","subst")),
.Names=c("Stem-initial obstruent",
"Behavior according to dictionary")),
class="table")
maN_RED
## Behavior according to dictionary
## Stem-initial obstruent unsubst vary subst
## p 0 0 18
## t 0 0 19
## s 1 0 25
## k 2 0 20
## b 20 3 15
## d 12 0 1
## g 12 0 0
## glottal 6 0 7
mosaic(maN_RED,direction="v",pop=FALSE,
gp=gpar(fill=c("white","grey","black"))
,labeling_args=list(
rot_labels=c(left=0)
,offset_varnames=c(left=1.5),offset_labels=c(left=0.5)
))
seekViewport("cell:Stem-initial obstruent=p,Behavior according to dictionary=unsubst")
if(maN_RED[1,1]>0) {grid.text(maN_RED[1,1],gp=gpar(col="black"))}
seekViewport("cell:Stem-initial obstruent=t,Behavior according to dictionary=unsubst")
if(maN_RED[2,1]>0) {grid.text(maN_RED[2,1],gp=gpar(col="black"))}
seekViewport("cell:Stem-initial obstruent=s,Behavior according to dictionary=unsubst")
if(maN_RED[3,1]>0) {grid.text(maN_RED[3,1],gp=gpar(col="black"))}
seekViewport("cell:Stem-initial obstruent=k,Behavior according to dictionary=unsubst")
if(maN_RED[4,1]>0) {grid.text(maN_RED[4,1],gp=gpar(col="black"))}
seekViewport("cell:Stem-initial obstruent=b,Behavior according to dictionary=unsubst")
if(maN_RED[5,1]>0) {grid.text(maN_RED[5,1],gp=gpar(col="black"))}
seekViewport("cell:Stem-initial obstruent=d,Behavior according to dictionary=unsubst")
if(maN_RED[6,1]>0) {grid.text(maN_RED[6,1],gp=gpar(col="black"))}
seekViewport("cell:Stem-initial obstruent=g,Behavior according to dictionary=unsubst")
if(maN_RED[7,1]>0) {grid.text(maN_RED[7,1],gp=gpar(col="black"))}
seekViewport("cell:Stem-initial obstruent=glottal,Behavior according to dictionary=unsubst")
if(maN_RED[8,1]>0) {grid.text(maN_RED[8,1],gp=gpar(col="black"))}
seekViewport("cell:Stem-initial obstruent=p,Behavior according to dictionary=vary")
if(maN_RED[1,2]>0) {grid.text(maN_RED[1,2],gp=gpar(col="black"))}
seekViewport("cell:Stem-initial obstruent=t,Behavior according to dictionary=vary")
if(maN_RED[2,2]>0) {grid.text(maN_RED[2,2],gp=gpar(col="black"))}
seekViewport("cell:Stem-initial obstruent=s,Behavior according to dictionary=vary")
if(maN_RED[3,2]>0) {grid.text(maN_RED[3,2],gp=gpar(col="black"))}
seekViewport("cell:Stem-initial obstruent=k,Behavior according to dictionary=vary")
if(maN_RED[4,2]>0) {grid.text(maN_RED[4,2],gp=gpar(col="black"))}
seekViewport("cell:Stem-initial obstruent=b,Behavior according to dictionary=vary")
if(maN_RED[5,2]>0) {grid.text(maN_RED[5,2],gp=gpar(col="black"))}
seekViewport("cell:Stem-initial obstruent=d,Behavior according to dictionary=vary")
if(maN_RED[6,2]>0) {grid.text(maN_RED[6,2],gp=gpar(col="black"))}
seekViewport("cell:Stem-initial obstruent=g,Behavior according to dictionary=vary")
if(maN_RED[7,2]>0) {grid.text(maN_RED[7,2],gp=gpar(col="black"))}
seekViewport("cell:Stem-initial obstruent=glottal,Behavior according to dictionary=vary")
if(maN_RED[8,2]>0) {grid.text(maN_RED[8,2],gp=gpar(col="black"))}
seekViewport("cell:Stem-initial obstruent=p,Behavior according to dictionary=subst")
if(maN_RED[1,3]>0) {grid.text(maN_RED[1,3],gp=gpar(col="white"))}
seekViewport("cell:Stem-initial obstruent=t,Behavior according to dictionary=subst")
if(maN_RED[2,3]>0) {grid.text(maN_RED[2,3],gp=gpar(col="white"))}
seekViewport("cell:Stem-initial obstruent=s,Behavior according to dictionary=subst")
if(maN_RED[3,3]>0) {grid.text(maN_RED[3,3],gp=gpar(col="white"))}
seekViewport("cell:Stem-initial obstruent=k,Behavior according to dictionary=subst")
if(maN_RED[4,3]>0) {grid.text(maN_RED[4,3],gp=gpar(col="white"))}
seekViewport("cell:Stem-initial obstruent=b,Behavior according to dictionary=subst")
if(maN_RED[5,3]>0) {grid.text(maN_RED[5,3],gp=gpar(col="white"))}
seekViewport("cell:Stem-initial obstruent=d,Behavior according to dictionary=subst")
if(maN_RED[6,3]>0) {grid.text(maN_RED[6,3],gp=gpar(col="white"))}
seekViewport("cell:Stem-initial obstruent=g,Behavior according to dictionary=subst")
if(maN_RED[7,3]>0) {grid.text(maN_RED[7,3],gp=gpar(col="white"))}
seekViewport("cell:Stem-initial obstruent=glottal,Behavior according to dictionary=subst")
if(maN_RED[8,3]>0) {grid.text(maN_RED[8,3],gp=gpar(col="white"))}
maN_adv<-structure(c(0,0,0,0,6,12,9,21,0,0,0,0,0,0,0,0,11,12,39,11,6,0,0,4),
.Dim=as.integer(c(8,3)),
.Dimnames=structure(list(Stem.initial.obstruent=
c("p","t","s","k","b","d","g","glottal"),
Subst.behavior=c("unsubst","vary","subst")),
.Names=c("Stem-initial obstruent",
"Behavior according to dictionary")),
class="table")
maN_adv
## Behavior according to dictionary
## Stem-initial obstruent unsubst vary subst
## p 0 0 11
## t 0 0 12
## s 0 0 39
## k 0 0 11
## b 6 0 6
## d 12 0 0
## g 9 0 0
## glottal 21 0 4
mosaic(maN_adv,direction="v",pop=FALSE,
gp=gpar(fill=c("white","grey","black"))
,labeling_args=list(
rot_labels=c(left=0)
,offset_varnames=c(left=1.5),offset_labels=c(left=0.5)
))
seekViewport("cell:Stem-initial obstruent=p,Behavior according to dictionary=unsubst")
if(maN_adv[1,1]>0) {grid.text(maN_adv[1,1],gp=gpar(col="black"))}
seekViewport("cell:Stem-initial obstruent=t,Behavior according to dictionary=unsubst")
if(maN_adv[2,1]>0) {grid.text(maN_adv[2,1],gp=gpar(col="black"))}
seekViewport("cell:Stem-initial obstruent=s,Behavior according to dictionary=unsubst")
if(maN_adv[3,1]>0) {grid.text(maN_adv[3,1],gp=gpar(col="black"))}
seekViewport("cell:Stem-initial obstruent=k,Behavior according to dictionary=unsubst")
if(maN_adv[4,1]>0) {grid.text(maN_adv[4,1],gp=gpar(col="black"))}
seekViewport("cell:Stem-initial obstruent=b,Behavior according to dictionary=unsubst")
if(maN_adv[5,1]>0) {grid.text(maN_adv[5,1],gp=gpar(col="black"))}
seekViewport("cell:Stem-initial obstruent=d,Behavior according to dictionary=unsubst")
if(maN_adv[6,1]>0) {grid.text(maN_adv[6,1],gp=gpar(col="black"))}
seekViewport("cell:Stem-initial obstruent=g,Behavior according to dictionary=unsubst")
if(maN_adv[7,1]>0) {grid.text(maN_adv[7,1],gp=gpar(col="black"))}
seekViewport("cell:Stem-initial obstruent=glottal,Behavior according to dictionary=unsubst")
if(maN_adv[8,1]>0) {grid.text(maN_adv[8,1],gp=gpar(col="black"))}
seekViewport("cell:Stem-initial obstruent=p,Behavior according to dictionary=vary")
if(maN_adv[1,2]>0) {grid.text(maN_adv[1,2],gp=gpar(col="black"))}
seekViewport("cell:Stem-initial obstruent=t,Behavior according to dictionary=vary")
if(maN_adv[2,2]>0) {grid.text(maN_adv[2,2],gp=gpar(col="black"))}
seekViewport("cell:Stem-initial obstruent=s,Behavior according to dictionary=vary")
if(maN_adv[3,2]>0) {grid.text(maN_adv[3,2],gp=gpar(col="black"))}
seekViewport("cell:Stem-initial obstruent=k,Behavior according to dictionary=vary")
if(maN_adv[4,2]>0) {grid.text(maN_adv[4,2],gp=gpar(col="black"))}
seekViewport("cell:Stem-initial obstruent=b,Behavior according to dictionary=vary")
if(maN_adv[5,2]>0) {grid.text(maN_adv[5,2],gp=gpar(col="black"))}
seekViewport("cell:Stem-initial obstruent=d,Behavior according to dictionary=vary")
if(maN_adv[6,2]>0) {grid.text(maN_adv[6,2],gp=gpar(col="black"))}
seekViewport("cell:Stem-initial obstruent=g,Behavior according to dictionary=vary")
if(maN_adv[7,2]>0) {grid.text(maN_adv[7,2],gp=gpar(col="black"))}
seekViewport("cell:Stem-initial obstruent=glottal,Behavior according to dictionary=vary")
if(maN_adv[8,2]>0) {grid.text(maN_adv[8,2],gp=gpar(col="black"))}
seekViewport("cell:Stem-initial obstruent=p,Behavior according to dictionary=subst")
if(maN_adv[1,3]>0) {grid.text(maN_adv[1,3],gp=gpar(col="white"))}
seekViewport("cell:Stem-initial obstruent=t,Behavior according to dictionary=subst")
if(maN_adv[2,3]>0) {grid.text(maN_adv[2,3],gp=gpar(col="white"))}
seekViewport("cell:Stem-initial obstruent=s,Behavior according to dictionary=subst")
if(maN_adv[3,3]>0) {grid.text(maN_adv[3,3],gp=gpar(col="white"))}
seekViewport("cell:Stem-initial obstruent=k,Behavior according to dictionary=subst")
if(maN_adv[4,3]>0) {grid.text(maN_adv[4,3],gp=gpar(col="white"))}
seekViewport("cell:Stem-initial obstruent=b,Behavior according to dictionary=subst")
if(maN_adv[5,3]>0) {grid.text(maN_adv[5,3],gp=gpar(col="white"))}
seekViewport("cell:Stem-initial obstruent=d,Behavior according to dictionary=subst")
if(maN_adv[6,3]>0) {grid.text(maN_adv[6,3],gp=gpar(col="white"))}
seekViewport("cell:Stem-initial obstruent=g,Behavior according to dictionary=subst")
if(maN_adv[7,3]>0) {grid.text(maN_adv[7,3],gp=gpar(col="white"))}
seekViewport("cell:Stem-initial obstruent=glottal,Behavior according to dictionary=subst")
if(maN_adv[8,3]>0) {grid.text(maN_adv[8,3],gp=gpar(col="white"))}
maN_other<-structure(c(0,0,0,0,6,3,13,4,0,0,0,1,0,0,0,0,65,39,32,74,66,7,0,34),
.Dim=as.integer(c(8,3)),
.Dimnames=structure(list(Stem.initial.obstruent=
c("p","t","s","k","b","d","g","glottal"),
Subst.behavior=c("unsubst","vary","subst")),
.Names=c("Stem-initial obstruent",
"Behavior according to dictionary")),
class="table")
maN_other
## Behavior according to dictionary
## Stem-initial obstruent unsubst vary subst
## p 0 0 65
## t 0 0 39
## s 0 0 32
## k 0 1 74
## b 6 0 66
## d 3 0 7
## g 13 0 0
## glottal 4 0 34
mosaic(maN_other,direction="v",pop=FALSE,
gp=gpar(fill=c("white","grey","black"))
,labeling_args=list(
rot_labels=c(left=0)
,gp_labels=gpar(fontsize=10)
,offset_varnames=c(left=1.5),offset_labels=c(left=0.5)
))
seekViewport("cell:Stem-initial obstruent=p,Behavior according to dictionary=unsubst")
if(maN_other[1,1]>0) {grid.text(maN_other[1,1],gp=gpar(col="black"))}
seekViewport("cell:Stem-initial obstruent=t,Behavior according to dictionary=unsubst")
if(maN_other[2,1]>0) {grid.text(maN_other[2,1],gp=gpar(col="black"))}
seekViewport("cell:Stem-initial obstruent=s,Behavior according to dictionary=unsubst")
if(maN_other[3,1]>0) {grid.text(maN_other[3,1],gp=gpar(col="black"))}
seekViewport("cell:Stem-initial obstruent=k,Behavior according to dictionary=unsubst")
if(maN_other[4,1]>0) {grid.text(maN_other[4,1],gp=gpar(col="black"))}
seekViewport("cell:Stem-initial obstruent=b,Behavior according to dictionary=unsubst")
if(maN_other[5,1]>0) {grid.text(maN_other[5,1],gp=gpar(col="black"))}
seekViewport("cell:Stem-initial obstruent=d,Behavior according to dictionary=unsubst")
if(maN_other[6,1]>0) {grid.text(maN_other[6,1],gp=gpar(col="black"))}
seekViewport("cell:Stem-initial obstruent=g,Behavior according to dictionary=unsubst")
if(maN_other[7,1]>0) {grid.text(maN_other[7,1],gp=gpar(col="black"))}
seekViewport("cell:Stem-initial obstruent=glottal,Behavior according to dictionary=unsubst")
if(maN_other[8,1]>0) {grid.text(maN_other[8,1],gp=gpar(col="black"))}
seekViewport("cell:Stem-initial obstruent=p,Behavior according to dictionary=vary")
if(maN_other[1,2]>0) {grid.text(maN_other[1,2],gp=gpar(col="black"))}
seekViewport("cell:Stem-initial obstruent=t,Behavior according to dictionary=vary")
if(maN_other[2,2]>0) {grid.text(maN_other[2,2],gp=gpar(col="black"))}
seekViewport("cell:Stem-initial obstruent=s,Behavior according to dictionary=vary")
if(maN_other[3,2]>0) {grid.text(maN_other[3,2],gp=gpar(col="black"))}
seekViewport("cell:Stem-initial obstruent=k,Behavior according to dictionary=vary")
if(maN_other[4,2]>0) {grid.text(maN_other[4,2],gp=gpar(col="black"))}
seekViewport("cell:Stem-initial obstruent=b,Behavior according to dictionary=vary")
if(maN_other[5,2]>0) {grid.text(maN_other[5,2],gp=gpar(col="black"))}
seekViewport("cell:Stem-initial obstruent=d,Behavior according to dictionary=vary")
if(maN_other[6,2]>0) {grid.text(maN_other[6,2],gp=gpar(col="black"))}
seekViewport("cell:Stem-initial obstruent=g,Behavior according to dictionary=vary")
if(maN_other[7,2]>0) {grid.text(maN_other[7,2],gp=gpar(col="black"))}
seekViewport("cell:Stem-initial obstruent=glottal,Behavior according to dictionary=vary")
if(maN_other[8,2]>0) {grid.text(maN_other[8,2],gp=gpar(col="black"))}
seekViewport("cell:Stem-initial obstruent=p,Behavior according to dictionary=subst")
if(maN_other[1,3]>0) {grid.text(maN_other[1,3],gp=gpar(col="white"))}
seekViewport("cell:Stem-initial obstruent=t,Behavior according to dictionary=subst")
if(maN_other[2,3]>0) {grid.text(maN_other[2,3],gp=gpar(col="white"))}
seekViewport("cell:Stem-initial obstruent=s,Behavior according to dictionary=subst")
if(maN_other[3,3]>0) {grid.text(maN_other[3,3],gp=gpar(col="white"))}
seekViewport("cell:Stem-initial obstruent=k,Behavior according to dictionary=subst")
if(maN_other[4,3]>0) {grid.text(maN_other[4,3],gp=gpar(col="white"))}
seekViewport("cell:Stem-initial obstruent=b,Behavior according to dictionary=subst")
if(maN_other[5,3]>0) {grid.text(maN_other[5,3],gp=gpar(col="white"))}
seekViewport("cell:Stem-initial obstruent=d,Behavior according to dictionary=subst")
if(maN_other[6,3]>0) {grid.text(maN_other[6,3],gp=gpar(col="white"))}
seekViewport("cell:Stem-initial obstruent=g,Behavior according to dictionary=subst")
if(maN_other[7,3]>0) {grid.text(maN_other[7,3],gp=gpar(col="white"))}
seekViewport("cell:Stem-initial obstruent=glottal,Behavior according to dictionary=subst")
if(maN_other[8,3]>0) {grid.text(maN_other[8,3],gp=gpar(col="white"))}
paN_noun<-structure(c(3,8,6,7,26,17,27,22,7,18,13,5,8,0,0,0,27,25,20,7,11,1,0,6),
.Dim=as.integer(c(8,3)),
.Dimnames=structure(list(Stem.initial.obstruent=
c("p","t","s","k","b","d","g","glottal"),
Subst.behavior=c("unsubst","vary","subst")),
.Names=c("Stem-initial obstruent",
"Behavior according to dictionary")),
class="table")
paN_noun
## Behavior according to dictionary
## Stem-initial obstruent unsubst vary subst
## p 3 7 27
## t 8 18 25
## s 6 13 20
## k 7 5 7
## b 26 8 11
## d 17 0 1
## g 27 0 0
## glottal 22 0 6
mosaic(paN_noun,direction="v",pop=FALSE,
gp=gpar(fill=c("white","grey","black"))
,labeling_args=list(
rot_labels=c(left=0)
,offset_varnames=c(left=1.5),offset_labels=c(left=0.5)
))
seekViewport("cell:Stem-initial obstruent=p,Behavior according to dictionary=unsubst")
if(paN_noun[1,1]>0) {grid.text(paN_noun[1,1],gp=gpar(col="black"))}
seekViewport("cell:Stem-initial obstruent=t,Behavior according to dictionary=unsubst")
if(paN_noun[2,1]>0) {grid.text(paN_noun[2,1],gp=gpar(col="black"))}
seekViewport("cell:Stem-initial obstruent=s,Behavior according to dictionary=unsubst")
if(paN_noun[3,1]>0) {grid.text(paN_noun[3,1],gp=gpar(col="black"))}
seekViewport("cell:Stem-initial obstruent=k,Behavior according to dictionary=unsubst")
if(paN_noun[4,1]>0) {grid.text(paN_noun[4,1],gp=gpar(col="black"))}
seekViewport("cell:Stem-initial obstruent=b,Behavior according to dictionary=unsubst")
if(paN_noun[5,1]>0) {grid.text(paN_noun[5,1],gp=gpar(col="black"))}
seekViewport("cell:Stem-initial obstruent=d,Behavior according to dictionary=unsubst")
if(paN_noun[6,1]>0) {grid.text(paN_noun[6,1],gp=gpar(col="black"))}
seekViewport("cell:Stem-initial obstruent=g,Behavior according to dictionary=unsubst")
if(paN_noun[7,1]>0) {grid.text(paN_noun[7,1],gp=gpar(col="black"))}
seekViewport("cell:Stem-initial obstruent=glottal,Behavior according to dictionary=unsubst")
if(paN_noun[8,1]>0) {grid.text(paN_noun[8,1],gp=gpar(col="black"))}
seekViewport("cell:Stem-initial obstruent=p,Behavior according to dictionary=vary")
if(paN_noun[1,2]>0) {grid.text(paN_noun[1,2],gp=gpar(col="black"))}
seekViewport("cell:Stem-initial obstruent=t,Behavior according to dictionary=vary")
if(paN_noun[2,2]>0) {grid.text(paN_noun[2,2],gp=gpar(col="black"))}
seekViewport("cell:Stem-initial obstruent=s,Behavior according to dictionary=vary")
if(paN_noun[3,2]>0) {grid.text(paN_noun[3,2],gp=gpar(col="black"))}
seekViewport("cell:Stem-initial obstruent=k,Behavior according to dictionary=vary")
if(paN_noun[4,2]>0) {grid.text(paN_noun[4,2],gp=gpar(col="black"))}
seekViewport("cell:Stem-initial obstruent=b,Behavior according to dictionary=vary")
if(paN_noun[5,2]>0) {grid.text(paN_noun[5,2],gp=gpar(col="black"))}
seekViewport("cell:Stem-initial obstruent=d,Behavior according to dictionary=vary")
if(paN_noun[6,2]>0) {grid.text(paN_noun[6,2],gp=gpar(col="black"))}
seekViewport("cell:Stem-initial obstruent=g,Behavior according to dictionary=vary")
if(paN_noun[7,2]>0) {grid.text(paN_noun[7,2],gp=gpar(col="black"))}
seekViewport("cell:Stem-initial obstruent=glottal,Behavior according to dictionary=vary")
if(paN_noun[8,2]>0) {grid.text(paN_noun[8,2],gp=gpar(col="black"))}
seekViewport("cell:Stem-initial obstruent=p,Behavior according to dictionary=subst")
if(paN_noun[1,3]>0) {grid.text(paN_noun[1,3],gp=gpar(col="white"))}
seekViewport("cell:Stem-initial obstruent=t,Behavior according to dictionary=subst")
if(paN_noun[2,3]>0) {grid.text(paN_noun[2,3],gp=gpar(col="white"))}
seekViewport("cell:Stem-initial obstruent=s,Behavior according to dictionary=subst")
if(paN_noun[3,3]>0) {grid.text(paN_noun[3,3],gp=gpar(col="white"))}
seekViewport("cell:Stem-initial obstruent=k,Behavior according to dictionary=subst")
if(paN_noun[4,3]>0) {grid.text(paN_noun[4,3],gp=gpar(col="white"))}
seekViewport("cell:Stem-initial obstruent=b,Behavior according to dictionary=subst")
if(paN_noun[5,3]>0) {grid.text(paN_noun[5,3],gp=gpar(col="white"))}
seekViewport("cell:Stem-initial obstruent=d,Behavior according to dictionary=subst")
if(paN_noun[6,3]>0) {grid.text(paN_noun[6,3],gp=gpar(col="white"))}
seekViewport("cell:Stem-initial obstruent=g,Behavior according to dictionary=subst")
if(paN_noun[7,3]>0) {grid.text(paN_noun[7,3],gp=gpar(col="white"))}
seekViewport("cell:Stem-initial obstruent=glottal,Behavior according to dictionary=subst")
if(paN_noun[8,3]>0) {grid.text(paN_noun[8,3],gp=gpar(col="white"))}
paN_res<-structure(c(3,5,5,5,17,7,4,5,3,3,3,0,1,0,0,0,3,2,0,2,1,0,0,0),
.Dim=as.integer(c(8,3)),
.Dimnames=structure(list(Stem.initial.obstruent=
c("p","t","s","k","b","d","g","glottal"),
Subst.behavior=c("unsubst","vary","subst")),
.Names=c("Stem-initial obstruent",
"Behavior according to dictionary")),
class="table")
paN_res
## Behavior according to dictionary
## Stem-initial obstruent unsubst vary subst
## p 3 3 3
## t 5 3 2
## s 5 3 0
## k 5 0 2
## b 17 1 1
## d 7 0 0
## g 4 0 0
## glottal 5 0 0
mosaic(paN_res,direction="v",pop=FALSE,
gp=gpar(fill=c("white","grey","black"))
,labeling_args=list(
rot_labels=c(left=0)
,offset_varnames=c(left=1.5),offset_labels=c(left=0.5)
))
seekViewport("cell:Stem-initial obstruent=p,Behavior according to dictionary=unsubst")
if(paN_res[1,1]>0) {grid.text(paN_res[1,1],gp=gpar(col="black"))}
seekViewport("cell:Stem-initial obstruent=t,Behavior according to dictionary=unsubst")
if(paN_res[2,1]>0) {grid.text(paN_res[2,1],gp=gpar(col="black"))}
seekViewport("cell:Stem-initial obstruent=s,Behavior according to dictionary=unsubst")
if(paN_res[3,1]>0) {grid.text(paN_res[3,1],gp=gpar(col="black"))}
seekViewport("cell:Stem-initial obstruent=k,Behavior according to dictionary=unsubst")
if(paN_res[4,1]>0) {grid.text(paN_res[4,1],gp=gpar(col="black"))}
seekViewport("cell:Stem-initial obstruent=b,Behavior according to dictionary=unsubst")
if(paN_res[5,1]>0) {grid.text(paN_res[5,1],gp=gpar(col="black"))}
seekViewport("cell:Stem-initial obstruent=d,Behavior according to dictionary=unsubst")
if(paN_res[6,1]>0) {grid.text(paN_res[6,1],gp=gpar(col="black"))}
seekViewport("cell:Stem-initial obstruent=g,Behavior according to dictionary=unsubst")
if(paN_res[7,1]>0) {grid.text(paN_res[7,1],gp=gpar(col="black"))}
seekViewport("cell:Stem-initial obstruent=glottal,Behavior according to dictionary=unsubst")
if(paN_res[8,1]>0) {grid.text(paN_res[8,1],gp=gpar(col="black"))}
seekViewport("cell:Stem-initial obstruent=p,Behavior according to dictionary=vary")
if(paN_res[1,2]>0) {grid.text(paN_res[1,2],gp=gpar(col="black"))}
seekViewport("cell:Stem-initial obstruent=t,Behavior according to dictionary=vary")
if(paN_res[2,2]>0) {grid.text(paN_res[2,2],gp=gpar(col="black"))}
seekViewport("cell:Stem-initial obstruent=s,Behavior according to dictionary=vary")
if(paN_res[3,2]>0) {grid.text(paN_res[3,2],gp=gpar(col="black"))}
seekViewport("cell:Stem-initial obstruent=k,Behavior according to dictionary=vary")
if(paN_res[4,2]>0) {grid.text(paN_res[4,2],gp=gpar(col="black"))}
seekViewport("cell:Stem-initial obstruent=b,Behavior according to dictionary=vary")
if(paN_res[5,2]>0) {grid.text(paN_res[5,2],gp=gpar(col="black"))}
seekViewport("cell:Stem-initial obstruent=d,Behavior according to dictionary=vary")
if(paN_res[6,2]>0) {grid.text(paN_res[6,2],gp=gpar(col="black"))}
seekViewport("cell:Stem-initial obstruent=g,Behavior according to dictionary=vary")
if(paN_res[7,2]>0) {grid.text(paN_res[7,2],gp=gpar(col="black"))}
seekViewport("cell:Stem-initial obstruent=glottal,Behavior according to dictionary=vary")
if(paN_res[8,2]>0) {grid.text(paN_res[8,2],gp=gpar(col="black"))}
seekViewport("cell:Stem-initial obstruent=p,Behavior according to dictionary=subst")
if(paN_res[1,3]>0) {grid.text(paN_res[1,3],gp=gpar(col="white"))}
seekViewport("cell:Stem-initial obstruent=t,Behavior according to dictionary=subst")
if(paN_res[2,3]>0) {grid.text(paN_res[2,3],gp=gpar(col="white"))}
seekViewport("cell:Stem-initial obstruent=s,Behavior according to dictionary=subst")
if(paN_res[3,3]>0) {grid.text(paN_res[3,3],gp=gpar(col="white"))}
seekViewport("cell:Stem-initial obstruent=k,Behavior according to dictionary=subst")
if(paN_res[4,3]>0) {grid.text(paN_res[4,3],gp=gpar(col="white"))}
seekViewport("cell:Stem-initial obstruent=b,Behavior according to dictionary=subst")
if(paN_res[5,3]>0) {grid.text(paN_res[5,3],gp=gpar(col="white"))}
seekViewport("cell:Stem-initial obstruent=d,Behavior according to dictionary=subst")
if(paN_res[6,3]>0) {grid.text(paN_res[6,3],gp=gpar(col="white"))}
seekViewport("cell:Stem-initial obstruent=g,Behavior according to dictionary=subst")
if(paN_res[7,3]>0) {grid.text(paN_res[7,3],gp=gpar(col="white"))}
seekViewport("cell:Stem-initial obstruent=glottal,Behavior according to dictionary=subst")
if(paN_res[8,3]>0) {grid.text(paN_res[8,3],gp=gpar(col="white"))}
par(mar=c(5,5,2,6))
suppressWarnings(with(nas_sub, {
interaction.plot(x.factor=consonant,
trace.factor=morph_unicode,
response=subst_rate,
xlab="stem-initial consonant",ylab="rate of nasal substitution"
, trace.label="prefix construction", fixed=TRUE, type="b", pch=c(0,1,2,20,18,15))
}))
#Since this will appear in the paper, also do a PNG file:
png(file="Tagalog_interaction_plot.png",width=myResMultiplier*460,height=myResMultiplier*300,res=myResMultiplier*72, family=myFontFamily)
par(mar=c(5,4,2,0)+0.1)
with(nas_sub, {
interaction.plot(x.factor=consonant,
trace.factor=morph_unicode,
response=subst_rate,
xlab="stem-initial consonant",ylab="rate of nasal substitution"
, trace.label="prefix construction", fixed=TRUE, type="b", pch=c(0,1,2,20,18,15))
})
dev.off()
## pdf
## 2
This isn’t used in the paper, but just for checking that consonant and morphology are in fact significant influences:
nas_sub.glm <- glm(cbind(subst_count, unsubst_count) ~ consonant + morph, data=nas_sub, family=binomial()) #will throw a warning because of the 0.5 values
## Warning: non-integer counts in a binomial glm!
summary(nas_sub.glm)
##
## Call:
## glm(formula = cbind(subst_count, unsubst_count) ~ consonant +
## morph, family = binomial(), data = nas_sub)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.8187 -0.4713 -0.0498 0.8885 2.4389
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 5.999 0.544 11.03 < 2e-16 ***
## consonantt/s -0.786 0.402 -1.95 0.0507 .
## consonantk -1.368 0.479 -2.86 0.0043 **
## consonantb -3.259 0.434 -7.52 5.6e-14 ***
## consonantd -5.766 0.568 -10.15 < 2e-16 ***
## consonantg -9.113 1.117 -8.16 3.4e-16 ***
## morphpaN-RED- -0.869 0.492 -1.77 0.0775 .
## morphmaN- adv. -2.146 0.517 -4.15 3.3e-05 ***
## morphmaN-RED- -2.648 0.444 -5.97 2.4e-09 ***
## morphpaN- noun -4.248 0.438 -9.69 < 2e-16 ***
## morphpaN- res. -5.910 0.546 -10.82 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 753.400 on 35 degrees of freedom
## Residual deviance: 33.584 on 25 degrees of freedom
## AIC: 116.7
##
## Number of Fisher Scoring iterations: 6
Anova(nas_sub.glm)
## Warning: non-integer #successes in a binomial glm!
## Warning: non-integer #successes in a binomial glm!
## Analysis of Deviance Table (Type II tests)
##
## Response: cbind(subst_count, unsubst_count)
## LR Chisq Df Pr(>Chisq)
## consonant 493 5 <2e-16 ***
## morph 283 5 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Which Cs are significantly different?
nas_sub.glht.C <- glht(nas_sub.glm, linfct = mcp(consonant = "Tukey"))
summary(nas_sub.glht.C) #I.e., all are different except p vs. t/s, t/s vs. k
##
## Simultaneous Tests for General Linear Hypotheses
##
## Multiple Comparisons of Means: Tukey Contrasts
##
##
## Fit: glm(formula = cbind(subst_count, unsubst_count) ~ consonant +
## morph, family = binomial(), data = nas_sub)
##
## Linear Hypotheses:
## Estimate Std. Error z value Pr(>|z|)
## t/s - p == 0 -0.786 0.402 -1.95 0.337
## k - p == 0 -1.368 0.479 -2.86 0.041 *
## b - p == 0 -3.259 0.434 -7.52 <0.001 ***
## d - p == 0 -5.766 0.568 -10.15 <0.001 ***
## g - p == 0 -9.113 1.117 -8.16 <0.001 ***
## k - t/s == 0 -0.583 0.370 -1.57 0.583
## b - t/s == 0 -2.474 0.303 -8.18 <0.001 ***
## d - t/s == 0 -4.980 0.474 -10.50 <0.001 ***
## g - t/s == 0 -8.327 1.073 -7.76 <0.001 ***
## b - k == 0 -1.891 0.376 -5.03 <0.001 ***
## d - k == 0 -4.398 0.519 -8.48 <0.001 ***
## g - k == 0 -7.745 1.092 -7.09 <0.001 ***
## d - b == 0 -2.507 0.432 -5.80 <0.001 ***
## g - b == 0 -5.854 1.051 -5.57 <0.001 ***
## g - d == 0 -3.347 1.084 -3.09 0.021 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## (Adjusted p values reported -- single-step method)
Which prefixes are significantly different?
nas_sub.glht.morph <- glht(nas_sub.glm, linfct = mcp(morph = "Tukey"))
summary(nas_sub.glht.morph) #I.e., all are different except paN-RED vs. maN- other, maN- adv vs. paN-RED-, maN- adv vs. maN-RED-
##
## Simultaneous Tests for General Linear Hypotheses
##
## Multiple Comparisons of Means: Tukey Contrasts
##
##
## Fit: glm(formula = cbind(subst_count, unsubst_count) ~ consonant +
## morph, family = binomial(), data = nas_sub)
##
## Linear Hypotheses:
## Estimate Std. Error z value Pr(>|z|)
## paN-RED- - maN- other == 0 -0.869 0.492 -1.77 0.48
## maN- adv. - maN- other == 0 -2.146 0.517 -4.15 <0.001 ***
## maN-RED- - maN- other == 0 -2.648 0.444 -5.97 <0.001 ***
## paN- noun - maN- other == 0 -4.248 0.438 -9.69 <0.001 ***
## paN- res. - maN- other == 0 -5.910 0.546 -10.82 <0.001 ***
## maN- adv. - paN-RED- == 0 -1.277 0.514 -2.49 0.12
## maN-RED- - paN-RED- == 0 -1.779 0.440 -4.05 <0.001 ***
## paN- noun - paN-RED- == 0 -3.380 0.431 -7.85 <0.001 ***
## paN- res. - paN-RED- == 0 -5.042 0.539 -9.35 <0.001 ***
## maN-RED- - maN- adv. == 0 -0.502 0.449 -1.12 0.87
## paN- noun - maN- adv. == 0 -2.103 0.423 -4.97 <0.001 ***
## paN- res. - maN- adv. == 0 -3.765 0.531 -7.09 <0.001 ***
## paN- noun - maN-RED- == 0 -1.600 0.325 -4.92 <0.001 ***
## paN- res. - maN-RED- == 0 -3.263 0.458 -7.12 <0.001 ***
## paN- res. - paN- noun == 0 -1.662 0.393 -4.23 <0.001 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## (Adjusted p values reported -- single-step method)
Another way to do it is to use the weights
argument for “case weights” (see https://stat.ethz.ch/pipermail/r-help/2007-March/126891.html), but it requires separate lines for subst. and unsubst:
#The following could be made from nas_sub, which would be more elegant than reading it in from a file:
tag <- read.table("TagalogRatesTable.txt", header=TRUE, sep="\t")
tag #varying words are treated as separate type
## C voice place morph outcome freq
## 1 p voiceless lab paN_RED 0unsub 0
## 2 p voiceless lab paN_RED vary 0
## 3 p voiceless lab paN_RED 1sub 39
## 4 t voiceless cor paN_RED 0unsub 0
## 5 t voiceless cor paN_RED vary 0
## 6 t voiceless cor paN_RED 1sub 22
## 7 s voiceless cor paN_RED 0unsub 0
## 8 s voiceless cor paN_RED vary 0
## 9 s voiceless cor paN_RED 1sub 41
## 10 k voiceless vel paN_RED 0unsub 1
## 11 k voiceless vel paN_RED vary 0
## 12 k voiceless vel paN_RED 1sub 25
## 13 b voiced lab paN_RED 0unsub 5
## 14 b voiced lab paN_RED vary 1
## 15 b voiced lab paN_RED 1sub 29
## 16 d voiced cor paN_RED 0unsub 7
## 17 d voiced cor paN_RED vary 0
## 18 d voiced cor paN_RED 1sub 3
## 19 g voiced vel paN_RED 0unsub 17
## 20 g voiced vel paN_RED vary 0
## 21 g voiced vel paN_RED 1sub 1
## 22 glott <NA> <NA> paN_RED 0unsub 7
## 23 glott <NA> <NA> paN_RED vary 0
## 24 glott <NA> <NA> paN_RED 1sub 17
## 25 p voiceless lab maN_RED 0unsub 0
## 26 p voiceless lab maN_RED vary 0
## 27 p voiceless lab maN_RED 1sub 18
## 28 t voiceless cor maN_RED 0unsub 0
## 29 t voiceless cor maN_RED vary 0
## 30 t voiceless cor maN_RED 1sub 19
## 31 s voiceless cor maN_RED 0unsub 1
## 32 s voiceless cor maN_RED vary 0
## 33 s voiceless cor maN_RED 1sub 25
## 34 k voiceless vel maN_RED 0unsub 2
## 35 k voiceless vel maN_RED vary 0
## 36 k voiceless vel maN_RED 1sub 20
## 37 b voiced lab maN_RED 0unsub 20
## 38 b voiced lab maN_RED vary 3
## 39 b voiced lab maN_RED 1sub 15
## 40 d voiced cor maN_RED 0unsub 12
## 41 d voiced cor maN_RED vary 0
## 42 d voiced cor maN_RED 1sub 1
## 43 g voiced vel maN_RED 0unsub 12
## 44 g voiced vel maN_RED vary 0
## 45 g voiced vel maN_RED 1sub 0
## 46 glott <NA> <NA> maN_RED 0unsub 6
## 47 glott <NA> <NA> maN_RED vary 0
## 48 glott <NA> <NA> maN_RED 1sub 7
## 49 p voiceless lab maN_adv 0unsub 0
## 50 p voiceless lab maN_adv vary 0
## 51 p voiceless lab maN_adv 1sub 11
## 52 t voiceless cor maN_adv 0unsub 0
## 53 t voiceless cor maN_adv vary 0
## 54 t voiceless cor maN_adv 1sub 12
## 55 s voiceless cor maN_adv 0unsub 0
## 56 s voiceless cor maN_adv vary 0
## 57 s voiceless cor maN_adv 1sub 39
## 58 k voiceless vel maN_adv 0unsub 0
## 59 k voiceless vel maN_adv vary 0
## 60 k voiceless vel maN_adv 1sub 11
## 61 b voiced lab maN_adv 0unsub 6
## 62 b voiced lab maN_adv vary 0
## 63 b voiced lab maN_adv 1sub 6
## 64 d voiced cor maN_adv 0unsub 12
## 65 d voiced cor maN_adv vary 0
## 66 d voiced cor maN_adv 1sub 0
## 67 g voiced vel maN_adv 0unsub 9
## 68 g voiced vel maN_adv vary 0
## 69 g voiced vel maN_adv 1sub 0
## 70 glott <NA> <NA> maN_adv 0unsub 21
## 71 glott <NA> <NA> maN_adv vary 0
## 72 glott <NA> <NA> maN_adv 1sub 4
## 73 p voiceless lab maN_other 0unsub 0
## 74 p voiceless lab maN_other vary 0
## 75 p voiceless lab maN_other 1sub 65
## 76 t voiceless cor maN_other 0unsub 0
## 77 t voiceless cor maN_other vary 0
## 78 t voiceless cor maN_other 1sub 39
## 79 s voiceless cor maN_other 0unsub 0
## 80 s voiceless cor maN_other vary 0
## 81 s voiceless cor maN_other 1sub 32
## 82 k voiceless vel maN_other 0unsub 0
## 83 k voiceless vel maN_other vary 1
## 84 k voiceless vel maN_other 1sub 74
## 85 b voiced lab maN_other 0unsub 6
## 86 b voiced lab maN_other vary 0
## 87 b voiced lab maN_other 1sub 66
## 88 d voiced cor maN_other 0unsub 3
## 89 d voiced cor maN_other vary 0
## 90 d voiced cor maN_other 1sub 7
## 91 g voiced vel maN_other 0unsub 13
## 92 g voiced vel maN_other vary 0
## 93 g voiced vel maN_other 1sub 0
## 94 glott <NA> <NA> maN_other 0unsub 4
## 95 glott <NA> <NA> maN_other vary 0
## 96 glott <NA> <NA> maN_other 1sub 34
## 97 p voiceless lab paN_nom 0unsub 3
## 98 p voiceless lab paN_nom vary 7
## 99 p voiceless lab paN_nom 1sub 27
## 100 t voiceless cor paN_nom 0unsub 8
## 101 t voiceless cor paN_nom vary 18
## 102 t voiceless cor paN_nom 1sub 25
## 103 s voiceless cor paN_nom 0unsub 6
## 104 s voiceless cor paN_nom vary 13
## 105 s voiceless cor paN_nom 1sub 20
## 106 k voiceless vel paN_nom 0unsub 7
## 107 k voiceless vel paN_nom vary 5
## 108 k voiceless vel paN_nom 1sub 7
## 109 b voiced lab paN_nom 0unsub 26
## 110 b voiced lab paN_nom vary 8
## 111 b voiced lab paN_nom 1sub 11
## 112 d voiced cor paN_nom 0unsub 17
## 113 d voiced cor paN_nom vary 0
## 114 d voiced cor paN_nom 1sub 1
## 115 g voiced vel paN_nom 0unsub 27
## 116 g voiced vel paN_nom vary 0
## 117 g voiced vel paN_nom 1sub 0
## 118 glott <NA> <NA> paN_nom 0unsub 22
## 119 glott <NA> <NA> paN_nom vary 0
## 120 glott <NA> <NA> paN_nom 1sub 6
## 121 p voiceless lab paN_res 0unsub 3
## 122 p voiceless lab paN_res vary 3
## 123 p voiceless lab paN_res 1sub 3
## 124 t voiceless cor paN_res 0unsub 5
## 125 t voiceless cor paN_res vary 3
## 126 t voiceless cor paN_res 1sub 2
## 127 s voiceless cor paN_res 0unsub 5
## 128 s voiceless cor paN_res vary 3
## 129 s voiceless cor paN_res 1sub 0
## 130 k voiceless vel paN_res 0unsub 5
## 131 k voiceless vel paN_res vary 0
## 132 k voiceless vel paN_res 1sub 2
## 133 b voiced lab paN_res 0unsub 17
## 134 b voiced lab paN_res vary 1
## 135 b voiced lab paN_res 1sub 1
## 136 d voiced cor paN_res 0unsub 7
## 137 d voiced cor paN_res vary 0
## 138 d voiced cor paN_res 1sub 0
## 139 g voiced vel paN_res 0unsub 4
## 140 g voiced vel paN_res vary 0
## 141 g voiced vel paN_res 1sub 0
## 142 glott <NA> <NA> paN_res 0unsub 5
## 143 glott <NA> <NA> paN_res vary 0
## 144 glott <NA> <NA> paN_res 1sub 0
tag.glm <- glm(outcome ~ morph+voice+place, weights = freq,
data = subset(tag, tag$outcome != "var"), family = binomial)
summary(tag.glm)
##
## Call:
## glm(formula = outcome ~ morph + voice + place, family = binomial,
## data = subset(tag, tag$outcome != "var"), weights = freq)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -5.876 0.000 0.000 0.889 5.290
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.962 0.458 -4.29 1.8e-05 ***
## morphmaN_other 2.076 0.485 4.28 1.8e-05 ***
## morphmaN_RED -0.312 0.452 -0.69 0.4908
## morphpaN_nom -1.361 0.430 -3.16 0.0016 **
## morphpaN_RED 1.245 0.492 2.53 0.0114 *
## morphpaN_res -3.617 0.555 -6.52 7.1e-11 ***
## voicevoiceless 5.358 0.371 14.46 < 2e-16 ***
## placelab 2.380 0.346 6.88 6.0e-12 ***
## placevel -1.411 0.326 -4.33 1.5e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1189.12 on 74 degrees of freedom
## Residual deviance: 504.25 on 66 degrees of freedom
## (18 observations deleted due to missingness)
## AIC: 522.3
##
## Number of Fisher Scoring iterations: 5
#Which affixes are pairwise different?
summary(glht(tag.glm, linfct = mcp(morph = "Tukey"))) #all but maN-RED- vs. maN- adv, paN-RED- vs. maN- adv
##
## Simultaneous Tests for General Linear Hypotheses
##
## Multiple Comparisons of Means: Tukey Contrasts
##
##
## Fit: glm(formula = outcome ~ morph + voice + place, family = binomial,
## data = subset(tag, tag$outcome != "var"), weights = freq)
##
## Linear Hypotheses:
## Estimate Std. Error z value Pr(>|z|)
## maN_other - maN_adv == 0 2.076 0.485 4.28 <0.001 ***
## maN_RED - maN_adv == 0 -0.312 0.452 -0.69 0.9824
## paN_nom - maN_adv == 0 -1.361 0.430 -3.16 0.0186 *
## paN_RED - maN_adv == 0 1.245 0.492 2.53 0.1114
## paN_res - maN_adv == 0 -3.617 0.555 -6.52 <0.001 ***
## maN_RED - maN_other == 0 -2.388 0.404 -5.91 <0.001 ***
## paN_nom - maN_other == 0 -3.438 0.397 -8.66 <0.001 ***
## paN_RED - maN_other == 0 -0.831 0.432 -1.92 0.3781
## paN_res - maN_other == 0 -5.693 0.546 -10.43 <0.001 ***
## paN_nom - maN_RED == 0 -1.050 0.335 -3.13 0.0205 *
## paN_RED - maN_RED == 0 1.557 0.414 3.77 0.0021 **
## paN_res - maN_RED == 0 -3.305 0.489 -6.76 <0.001 ***
## paN_RED - paN_nom == 0 2.607 0.404 6.45 <0.001 ***
## paN_res - paN_nom == 0 -2.256 0.441 -5.11 <0.001 ***
## paN_res - paN_RED == 0 -4.862 0.549 -8.86 <0.001 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## (Adjusted p values reported -- single-step method)
One of the models to fit to the data is a decision-tree (multiplicative) model: give each prefix a probability of creating a configuration compatible with nasal substitution, and give each consonant a probability of undergoing nas-sub, if it’s in the right configuration. The probability of C+morph combination displaying nas-sub is then the product of those two probabilities (hence, ‘multiplicative’). The problem is that a model like this can only create a pinch at one end, resulting in a “claw” shape rather than a “wug” shape (Dustin Bowers’s term).
Fit the optimal probabilities:
set.seed(1234) #just in case we do anything with random numbers
myMultiplicative <- function(x) { #define the error function that is to be minimized
#log likelihood
log_likelihood <- 0
for (i in 1:6) { #loop over prefixes
for(j in 1:6) { #loop over Cs
#increment log likelihood by (token-weighted) number of substituted items times the log of their probability, plus (token-weighted) number of unsubstituted items items log of their probability
log_likelihood <- log_likelihood + nas_sub[i+6*(j-1),4]*log(x[i]*x[6+j]) + nas_sub[i+6*(j-1),5]*log(1-(x[i]*x[6+j])) #This is rather fiddly. The idea is that x[1:6] are the probabilities for the prefix, and x[7:12] are the probabilities for the Cs groups. We're using the loops to step through the rows of the nas_sub data-frame
}
}
return(-1*log_likelihood) #by default optim() minimizes, so we minimize the negative log likelihood (that is, get log likelihood as close to zero as possible)
}
#run the optimizer
myOptimization <- optim(par=c(0.5,0.5,0.5,0.5,0.5,0.5, 0.5,0.5,0.5,0.5,0.5,0.5), fn=myMultiplicative, lower=0.000001, upper=1-0.000001, method="L-BFGS-B") #we have to keep the parameters away from actual 0 and 1 or else undefined log()s will result and an error is thrown
#view the parameters
myOptimization$par
## [1] 1.00000 1.00000 0.94680 1.00000 0.65385 0.26828 1.00000 1.00000
## [9] 0.98453 0.74334 0.21067 0.01372
#get the log likelihood
-1*myOptimization$value
## [1] -292.3
Add predicted probabilities as a new column to data-frame:
for (i in 1:6) { #loop over prefixes
for(j in 1:6) { #loop over Cs
nas_sub$multiplicative_prediction[i+6*(j-1)] <- myOptimization$par[i] * myOptimization$par[6+j]
}
}
Plot it:
par(mar=c(5,5,2,6))
suppressWarnings(with(nas_sub, {
interaction.plot(x.factor=consonant,
trace.factor=morph_unicode,
response=multiplicative_prediction,
xlab="stem-initial consonant",ylab="nas. sub. rate predicted by multiplicative model"
, trace.label="prefix construction", fixed=TRUE, type="b", pch=c(0,1,2,20,18,15))
}))
#Since this will appear in the paper, also do a PNG file:
png(file="Tagalog_decisionTree_model.png",width=myResMultiplier*460,height=myResMultiplier*300,res=myResMultiplier*72,family=myFontFamily)
par(mar=c(5,4,2,0)+0.1)
with(nas_sub, {
interaction.plot(x.factor=consonant,
trace.factor=morph_unicode,
response=multiplicative_prediction,
xlab="stem-initial consonant",ylab="nas. sub. rate predicted by multiplicative model"
, trace.label="prefix construction", fixed=TRUE, type="b", pch=c(0,1,2,20,18,15))
})
dev.off()
## pdf
## 2
To aid reproducibility and record-keeping, we generate an OTSoft input file automatically from this script.
Print first two rows (constraint names):
write(c("\t\t\t*NC\t*[m/n/ng\t*[n/ng\t*[ng\tNasSub\tFaith-maN- other\tFaith-paN-RED-\tFaith-maN-RED-\tFaith-maN- adv\tFaith-paN- noun\tFaith-paN- res","\t\t\t*NC\t*[m/n/ng\t*[n/ng\t*[ng\tNasSub\tFaith-maN- other\tFaith-paN-RED-\tFaith-maN-RED-\tFaith-maN- adv\tFaith-paN- noun\tFaith-paN- res"), file="Tagalog_for_OTSoft.txt",append=FALSE) #overwrite anything there already
Loop through data frame, turning each row into a two-row tableau:
for(i in 1:length(nas_sub$consonant)) {
#make the input
myInput <- paste(nas_sub$consonant[i], nas_sub$morph[i])
#make the violation vectors for the two candidates
mySubst_violations <- rep(0,11) #initialize to all zeros
myUnsubst_violations <- rep(0,11) #initialize to all zeros
if(nas_sub$consonant[i] %in% c("p","t/s","k")) {
myUnsubst_violations[1] <- 1 #*NC
}
if(nas_sub$consonant[i] %in% c("p","b")) {
mySubst_violations[2] <- 1 #*[m/n/ng
} else if(nas_sub$consonant[i] %in% c("t/s", "d")) {
mySubst_violations[2:3] <- 1 #*[m/n/ng, *[m/ng
} else if(nas_sub$consonant[i] %in% c("k", "g")) {
mySubst_violations[2:4] <- 1 #*[m/n/ng, *[m/ng, *[ng
}
myUnsubst_violations[5] <- 1 #NasSub
if(nas_sub$morph[i] == "maN- other") {
mySubst_violations[6] <- 1
} else if(nas_sub$morph[i] == "paN-RED-") {
mySubst_violations[7] <- 1
} else if(nas_sub$morph[i] == "maN-RED-") {
mySubst_violations[8] <- 1
} else if(nas_sub$morph[i] == "maN- adv.") {
mySubst_violations[9] <- 1
}else if(nas_sub$morph[i] == "paN- noun") {
mySubst_violations[10] <- 1
}else if(nas_sub$morph[i] == "paN- res.") {
mySubst_violations[11] <- 1
}
mySubst_violations <- paste(as.character(mySubst_violations),collapse="\t")
myUnsubst_violations <- paste(as.character(myUnsubst_violations),collapse="\t")
#print the two rows
write(c(paste(myInput,"substituted",nas_sub$subst_count[i],mySubst_violations,sep="\t"),paste("","unsubstituted",nas_sub$unsubst_count[i],myUnsubst_violations,sep="\t")),file="Tagalog_for_OTSoft.txt",append=TRUE)
}
#While the above produces an output file that *looks* fine, and that the MaxEnt Grammar Tool has no problem with, OTSoft ignores its last line for some reason. This seems to fix the problem:
write(c("\t\t\t\t\t\t\t\t\t"),file="Tagalog_for_OTSoft.txt",append=TRUE)
##BUT! The line with the tabs has to be removed in order to run Bruce's multifold GLA program, or else it acts as an additional candidate (with no constraint violations, so always wins)
#Hm, sometimes it also has to be removed for MaxEnt grammar tool
#Try this instead:
#write(c(" "),file="Tagalog_for_OTSoft.txt",append=TRUE)
Outside of this script, the file made by the above code, Tagalog_for_OTSoft.txt
, is then used as input for a few constraint models.
The MaxEnt Grammar Tool (http://www.linguistics.ucla.edu/people/hayes/MaxentGrammarTool/), is used, with default settings. In particular, we use the default values of mu and sigma for every constrain(mu=0, sigma=10000). The large sigma means that, in effect, there no regularization/smoothing/prior–just as close a fit as possible. The output file is named Tagalog_for_OTSoft_MaxEnt_output.txt
.
Read in the MaxEnt output file:
conn <- file("Tagalog_for_OTSoft_MaxEnt_output.txt",open="r")
maxEntLines <- readLines(conn)
close(conn)
Find and parse the lines at end that give probabilities for candidates, adding results to main data frame:
#Define function to find the header row for this part
getHeaderRowIndex <- function() {
for(i in 1:length(maxEntLines)) {
if(maxEntLines[i]=="Input:\tCandidate:\tObserved:\tPredicted:") {
return(i)
}
}
}
#Use the function to see where to start for loop
headerRowIndex <- getHeaderRowIndex()
for(i in headerRowIndex+1:length(maxEntLines)){
#split into columns
lineParts <- strsplit(maxEntLines[i],split="\t")
#Don't bother proceeding futher if it's un "unsubstituted" candidate line
if(length(lineParts[[1]]) == 4 & lineParts[[1]][2]=="substituted") {
#extract consonant, morphology, and probability of substituted candidate
myC <- strsplit(lineParts[[1]][1],split=" ",fixed=TRUE)[[1]][1]
myMorph <- str_sub(lineParts[[1]][1],str_length(myC)+2)
mySubst_probability <- as.numeric(lineParts[[1]][4])
#add to main data frame
nas_sub$maxentPredictions[nas_sub$consonant==myC & nas_sub$morph==myMorph] <- mySubst_probability
}
}
Plot it:
par(mar=c(5,5,2,6))
suppressWarnings(with(nas_sub, {
interaction.plot(x.factor=consonant,
trace.factor=morph_unicode,
response=maxentPredictions,
xlab="stem-initial consonant",ylab="nas. sub. rate predicted by MaxEnt model"
, trace.label="prefix construction", fixed=TRUE, type="b", pch=c(0,1,2,20,18,15))
}))
#Since this will appear in the paper, also do a PNG file:
png(file="Tagalog_MaxEnt_model.png",width=myResMultiplier*460,height=myResMultiplier*300,res=myResMultiplier*72,family=myFontFamily)
par(mar=c(5,4,2,0)+0.1)
with(nas_sub, {
interaction.plot(x.factor=consonant,
trace.factor=morph_unicode,
response=maxentPredictions,
xlab="stem-initial consonant",ylab="nas. sub. rate predicted by MaxEnt model"
, trace.label="prefix construction", fixed=TRUE, type="b", pch=c(0,1,2,20,18,15))
})
dev.off()
## pdf
## 2
Get a table of constraint names and weights:
#Initialize data frame
maxEntGrammar <- data.frame(constraintName=character(0), constraintWeight = numeric(0))
#Define function to find the header row for this part
getConstraintHeaderRowIndex <- function() {
for(i in 1:length(maxEntLines)) {
if(maxEntLines[i]=="|weights| after optimization:") {
return(i)
}
}
}
#Use the function to see where to start for loop
constraintHeaderRowIndex <- getConstraintHeaderRowIndex()
#Extract names and weights and add to data frame
for(i in (constraintHeaderRowIndex+1):(headerRowIndex-1)){
lineParts <- strsplit(maxEntLines[i],split="\t")
myConstraintName <- strsplit(lineParts[[1]][1],split="\\(")[[1]][1] #split at opening ( of mu/sigma note
myConstraintName <- substr(myConstraintName,start=1,stop=nchar(myConstraintName)-1) #trim final space off of constraint name
myConstraintWeight <- lineParts[[1]][2]
maxEntGrammar <- rbind(maxEntGrammar,data.frame(constraintName=myConstraintName, constraintWeight=myConstraintWeight))
}
#Print to console, for pasting into Word document
maxEntGrammar
## constraintName constraintWeight
## 1 *NC 4.850637335679573
## 2 *[m/n/ng 0.0
## 3 *[n/ng 2.1263895994464734
## 4 *[ng 1.1565693854918335
## 5 NasSub 2.310635996134479
## 6 Faith-maN- other 0.0
## 7 Faith-paN-RED- 0.8154354862375875
## 8 Faith-maN-RED- 2.28788653243942
## 9 Faith-maN- adv 1.9200481316624465
## 10 Faith-paN- noun 4.063326210375418
## 11 Faith-paN- res 6.008815758248136
When the article discusses the MaxEnt models, it includes some plots to help explain the sigmoid shape of the MaxEnt grammar’s predictions.
maN- other vs. paN- reservational, for each consonant
#get UNIF weights
maN_other_weight <- as.numeric(as.character(maxEntGrammar$constraintWeight[maxEntGrammar$constraintName=="Faith-maN- other"]))
paN_res_weight <- as.numeric(as.character(maxEntGrammar$constraintWeight[maxEntGrammar$constraintName=="Faith-paN- res"]))
#plot two curves and a legend
curve(1/(1+exp(x+maN_other_weight)), -10, 10, lty=1, xlab="markedness difference for C", ylab="prob. of substitution",ylim=c(0,1), cex.axis=0.9)
curve(1/(1+exp(x+paN_res_weight)), add=TRUE, lty=3)
suppressWarnings(legend(2, 0.9, c("ma\U014B- other","pa\U014B- reservational"), cex=0.8, lty=c(1,3)))
#get markedness differences for each C
markedness_differences <- data.frame(consonant=character(0), mark_diff = numeric(0))
for(myC in levels(nas_sub$consonant)) {
#start with unsubst's penalty for NasSub
myMarkednessDifference <- 0 - as.numeric(as.character(maxEntGrammar$constraintWeight[maxEntGrammar$constraintName=="NasSub"]))
#subtract unsubst's penalty for *NC, if applicable
if(myC %in% c("p","t/s","k")) {
myMarkednessDifference <- myMarkednessDifference - as.numeric(as.character(maxEntGrammar$constraintWeight[maxEntGrammar$constraintName=="*NC"]))
}
#add subst's penalty for applicable *[nasal constraints
myMarkednessDifference <- myMarkednessDifference + as.numeric(as.character(maxEntGrammar$constraintWeight[maxEntGrammar$constraintName=="*[m/n/ng"])) #all of them violate this one
if(myC %in% c("t/s","k","d","g")) {
myMarkednessDifference <- myMarkednessDifference + as.numeric(as.character(maxEntGrammar$constraintWeight[maxEntGrammar$constraintName=="*[n/ng"])) #just the coronals and velars
}
if(myC %in% c("k","g")) {
myMarkednessDifference <- myMarkednessDifference + as.numeric(as.character(maxEntGrammar$constraintWeight[maxEntGrammar$constraintName=="*[ng"])) #just the velars
}
#Now draw a line there
abline(v=myMarkednessDifference,col=3,lty=2)
#And add a label
mtext(myC,side=3, at=myMarkednessDifference, line=0.5)
#Record the result in the data frame:
markedness_differences <- rbind(markedness_differences,data.frame(consonant=myC, mark_diff=myMarkednessDifference))
}
#Now the same thing, but written to a file for pasting into paper, and without saving the markedness differences
png(file="Tagalog_maN_vs_paN.png",width=myResMultiplier*460,height=myResMultiplier*225,res=myResMultiplier*72,family=myFontFamily)
par(mar=c(5,4,2,0)+0.1)
curve(1/(1+exp(x+maN_other_weight)), -10, 10, lty=1, xlab="markedness difference for C", ylab="prob. of substitution",ylim=c(0,1), cex.axis=0.9)
curve(1/(1+exp(x+paN_res_weight)), add=TRUE, lty=3)
legend(2, 0.9, c("ma\U014B- other","pa\U014B- reservational"), cex=0.8, lty=c(1,3))
for(myC in levels(nas_sub$consonant)) {
myMarkednessDifference <- 0 - as.numeric(as.character(maxEntGrammar$constraintWeight[maxEntGrammar$constraintName=="NasSub"]))
if(myC %in% c("p","t/s","k")) {
myMarkednessDifference <- myMarkednessDifference - as.numeric(as.character(maxEntGrammar$constraintWeight[maxEntGrammar$constraintName=="*NC"]))
}
myMarkednessDifference <- myMarkednessDifference + as.numeric(as.character(maxEntGrammar$constraintWeight[maxEntGrammar$constraintName=="*[m/n/ng"]))
if(myC %in% c("t/s","k","d","g")) {
myMarkednessDifference <- myMarkednessDifference + as.numeric(as.character(maxEntGrammar$constraintWeight[maxEntGrammar$constraintName=="*[n/ng"]))
}
if(myC %in% c("k","g")) {
myMarkednessDifference <- myMarkednessDifference + as.numeric(as.character(maxEntGrammar$constraintWeight[maxEntGrammar$constraintName=="*[ng"])) #just the velars
}
abline(v=myMarkednessDifference,col=3,lty=2)
mtext(myC,side=3, at=myMarkednessDifference, line=0.5)
}
dev.off()
## pdf
## 2
Now the converse: p vs. k for different UNIFORMITY weights
#a little extra head room for labels
par(mar=c(5,4,5,2))
#plot two curves and a legend
curve(1/(1+exp(markedness_differences$mark_diff[markedness_differences$consonant=="p"] + x)), 0, 15, lty=1, xlab="weight(Uniformity) for each prefix", ylab="prob. of substitution",ylim=c(0,1), cex.axis=0.9)
curve(1/(1+exp(markedness_differences$mark_diff[markedness_differences$consonant=="k"] + x)), add=TRUE, lty=3)
legend(10, 1.0, c("p","k"), cex=0.8, lty=c(1,3))
for(myMorph in levels(nas_sub$morph)) {
#get the unicode version of the morpheme name
myMorphUnicode <- nas_sub$morph_unicode[nas_sub$morph==myMorph][1]
#form the corresponding constraint name
myConstraintName <- paste("Faith-",myMorph,sep="")
#remove final period if necessary
if(substr(myConstraintName,start=nchar(myConstraintName),stop=nchar(myConstraintName))==".") {
myConstraintName <- substr(myConstraintName,start=0,stop=nchar(myConstraintName)-1)
}
#get UNIF weight
myUnifWeight <- as.numeric(as.character(maxEntGrammar$constraintWeight[maxEntGrammar$constraintName==myConstraintName]))
#Now draw a line there
abline(v=myUnifWeight,col=3,lty=2)
#And add a label
par(las=3) #vertical text
suppressWarnings(mtext(myMorphUnicode, side=3, at=myUnifWeight, line=0.5))
par(las=0) #return to default
}
#Now the same thing, but written to a file for pasting into paper
png(file="Tagalog_p_vs_k.png",width=myResMultiplier*460,height=myResMultiplier*270, res=myResMultiplier*72, family=myFontFamily)
par(mar=c(5,4,5,0)+0.1)
curve(1/(1+exp(markedness_differences$mark_diff[markedness_differences$consonant=="p"] + x)), 0, 12, lty=1, xlab="weight(Uniformity) for each prefix", ylab="prob. of substitution",ylim=c(0,1), cex.axis=0.9)
curve(1/(1+exp(markedness_differences$mark_diff[markedness_differences$consonant=="k"] + x)), add=TRUE, lty=3)
legend(10, 1.0, c("p","k"), cex=0.8, lty=c(1,3))
for(myMorph in levels(nas_sub$morph)) {
myMorphUnicode <- nas_sub$morph_unicode[nas_sub$morph==myMorph][1]
myConstraintName <- paste("Faith-",myMorph,sep="")
if(substr(myConstraintName,start=nchar(myConstraintName),stop=nchar(myConstraintName))==".") {
myConstraintName <- substr(myConstraintName,start=0,stop=nchar(myConstraintName)-1)
}
myUnifWeight <- as.numeric(as.character(maxEntGrammar$constraintWeight[maxEntGrammar$constraintName==myConstraintName]))
abline(v=myUnifWeight,col=3,lty=2)
par(las=3)
suppressWarnings(mtext(myMorphUnicode,side=3, at=myUnifWeight, line=0.5))
par(las=0)
}
dev.off()
## pdf
## 2
Same thing except b vs. g, to show a different end of the curve–not used in the paper, so plot is not as polished:
#a little extra head room for labels
par(mar=c(5,4,5,2))
#plot two curves and a legend
curve(1/(1+exp(markedness_differences$mark_diff[markedness_differences$consonant=="b"] + x)), 0, 15, lty=1, xlab="markedness difference for C", ylab="prob. of substitution",ylim=c(0,1), cex.axis=0.9)
curve(1/(1+exp(markedness_differences$mark_diff[markedness_differences$consonant=="g"] + x)), add=TRUE, lty=3)
legend(12, 1.0, c("b","g"), cex=0.8, lty=c(1,3))
for(myMorph in levels(nas_sub$morph)) {
#form the corresponding constraint name
myConstraintName <- paste("Faith-",myMorph,sep="")
#remove final period if necessary
if(substr(myConstraintName,start=nchar(myConstraintName),stop=nchar(myConstraintName))==".") {
myConstraintName <- substr(myConstraintName,start=0,stop=nchar(myConstraintName)-1)
}
#get UNIF weight
myUnifWeight <- as.numeric(as.character(maxEntGrammar$constraintWeight[maxEntGrammar$constraintName==myConstraintName]))
#Now draw a line there
abline(v=myUnifWeight,col=3,lty=2)
#And add a label
par(las=3) #vertical text
mtext(myMorph,side=3, at=myUnifWeight)
par(las=0) #return to default
}
#Now the same thing, but written to a file for pasting into paper
png(file="Tagalog_b_vs_g.png",width=460,height=225)
par(mar=c(5,4,5,1))
curve(1/(1+exp(markedness_differences$mark_diff[markedness_differences$consonant=="b"] + x)), 0, 15, lty=1, xlab="markedness difference for C", ylab="prob. of substitution",ylim=c(0,1), cex.axis=0.9)
curve(1/(1+exp(markedness_differences$mark_diff[markedness_differences$consonant=="g"] + x)), add=TRUE, lty=3)
legend(12, 1.0, c("b","g"), cex=0.8, lty=c(1,3))
for(myMorph in levels(nas_sub$morph)) {
myConstraintName <- paste("Faith-",myMorph,sep="")
if(substr(myConstraintName,start=nchar(myConstraintName),stop=nchar(myConstraintName))==".") {
myConstraintName <- substr(myConstraintName,start=0,stop=nchar(myConstraintName)-1)
}
myUnifWeight <- as.numeric(as.character(maxEntGrammar$constraintWeight[maxEntGrammar$constraintName==myConstraintName]))
abline(v=myUnifWeight,col=3,lty=2)
par(las=3)
mtext(myMorph,side=3, at=myUnifWeight)
par(las=0)
}
dev.off()
## pdf
## 2
We use OTSoft (http://www.linguistics.ucla.edu/people/hayes/otsoft/) to fit a Noisy Harmonic Grammar model using the Gradual Learning Algorithm. Default options were used everywhere:
The only non-default option is that in the main screen, Options > Sort candiates in tableaux by harmony
is turned off, to make dealing with the OTSoft output, below, easier.
Read in the Noisy HG output file:
conn <- file("Tagalog_for_OTSoft_NHGTabbedOutput.txt",open="r")
NHGLines <- readLines(conn)
close(conn)
Find and parse the lines at end that give probabilities for candidates–we need to find the third line that starts with " 1 " :
#Define function to find the header row for this part
howManyTimes1Seen <- 0 #initialize counter
getNHGStartingRowIndex <- function() {
for(i in 1:length(NHGLines)) {
if(str_sub(NHGLines[i],1,4)==" 1 \t") {
howManyTimes1Seen <- howManyTimes1Seen + 1
if(howManyTimes1Seen==3) {
return(i) #once we hit the third one, return the index
}
}
}
}
#Use the function to see where to start for loop
startingRowIndex <- getNHGStartingRowIndex()
for(i in startingRowIndex:length(NHGLines)){
#split into columns
lineParts <- strsplit(NHGLines[i],split="\t")
#extract consonant, morphology, and probability of substituted candidate
myC <- strsplit(lineParts[[1]][2],split=" ",fixed=TRUE)[[1]][1]
myMorph <- str_sub(lineParts[[1]][2],str_length(myC)+2)
#different lines have candidates in different orders
if(lineParts[[1]][4]=="unsubstituted") {
mySubst_probability <- 1 - as.numeric(lineParts[[1]][7])/1000000
}
else if(lineParts[[1]][4]=="substituted") {
mySubst_probability <- as.numeric(lineParts[[1]][7])/1000000
}
#add to main data frame
nas_sub$NHG_Predictions[nas_sub$consonant==myC & nas_sub$morph==myMorph] <- mySubst_probability
}
Plot it:
par(mar=c(5,5,2,6))
suppressWarnings(with(nas_sub, {
interaction.plot(x.factor=consonant,
trace.factor=morph_unicode,
response=NHG_Predictions,
xlab="stem-initial consonant",ylab="nas. sub. rate predicted by NHG model"
, trace.label="prefix construction", fixed=TRUE, type="b", pch=c(0,1,2,20,18,15))
}))
#Since this will appear in the paper, also do a PNG file:
png(file="Tagalog_NHG_model.png",width=myResMultiplier*460,height=myResMultiplier*300,res=myResMultiplier*72,family=myFontFamily)
par(mar=c(5,4,2,0)+0.1)
with(nas_sub, {
interaction.plot(x.factor=consonant,
trace.factor=morph_unicode,
response=NHG_Predictions,
xlab="stem-initial consonant",ylab="nas. sub. rate predicted by NHG model"
, trace.label="prefix construction", fixed=TRUE, type="b", pch=c(0,1,2,20,18,15))
})
dev.off()
## pdf
## 2
Get a table of constraint names and weights:
#Initialize data frame
NHGGrammar <- data.frame(constraintName=character(0), constraintWeight = numeric(0))
#Define function to find the header row for this part
getNHGConstraintEndingRowIndex <- function() {
for(i in 1:length(NHGLines)) {
if(str_sub(NHGLines[i],1,4)==" 1 \t") { #find first instance of row that starts with "1" (rather than constraint rame)
return(i)
}
}
}
#Use the function to see where to start for loop
constraintEndingRowIndex <- getNHGConstraintEndingRowIndex()
#Extract names and weights and add to data frame
for(i in 1:(constraintEndingRowIndex-1)){
lineParts <- strsplit(NHGLines[i],split="\t")
myConstraintName <- lineParts[[1]][1]
myConstraintWeight <- as.numeric(lineParts[[1]][2])
NHGGrammar <- rbind(NHGGrammar,data.frame(constraintName=myConstraintName, constraintWeight=myConstraintWeight))
}
#Print to console, for pasting into Word document
NHGGrammar
## constraintName constraintWeight
## 1 *NC 10.6079
## 2 *[m/n/ng 0.4193
## 3 *[n/ng 4.3617
## 4 *[ng 2.1659
## 5 NasSub 6.1707
## 6 Faith-maN- other 0.3689
## 7 Faith-paN-RED- 2.1161
## 8 Faith-maN-RED- 5.0054
## 9 Faith-maN- adv 4.3075
## 10 Faith-paN- noun 8.9390
## 11 Faith-paN- res 13.6123
As with MaxEnt, the text of the paper includes an explanation of why NHG creates sigmoids. This plot accompanies that discussion. It is a plot of substitution probability as a function of UNIFORMITY weight, for two different consonants, p and k.
NC_weight <- as.numeric(as.character(NHGGrammar$constraintWeight[NHGGrammar$constraintName=="*NC"]))
NasSub_weight <- as.numeric(as.character(NHGGrammar$constraintWeight[NHGGrammar$constraintName=="NasSub"]))
mnng_weight <- as.numeric(as.character(NHGGrammar$constraintWeight[NHGGrammar$constraintName=="*[m/n/ng"]))
nng_weight <- as.numeric(as.character(NHGGrammar$constraintWeight[NHGGrammar$constraintName=="*[n/ng"]))
ng_weight <- as.numeric(as.character(NHGGrammar$constraintWeight[NHGGrammar$constraintName=="*[ng"]))
par(mar=c(5,4,5,2)) #extra room at top for morphology labels
#plot two curves
curve(pnorm(NC_weight+NasSub_weight-mnng_weight-x, sd = sqrt(4*4)), 0, 30, lty=1, xlab="UNIF weight", ylab="prob. of substitution",ylim=c(0,1), cex.axis=0.9)
curve(pnorm(NC_weight+NasSub_weight-mnng_weight-nng_weight-ng_weight-x, sd = sqrt(6*4)), add=TRUE, lty=3)
legend("topright", c("p","k"), cex=0.8, lty=c(1,3))
#add the lines for the UNIF weights
for(myMorph in levels(nas_sub$morph)) {
#get the unicode version of the morpheme name
myMorphUnicode <- nas_sub$morph_unicode[nas_sub$morph == myMorph][1]
#form the corresponding constraint name
myConstraintName <- paste("Faith-",myMorph,sep="")
#remove final period if necessary
if(substr(myConstraintName,start=nchar(myConstraintName),stop=nchar(myConstraintName))==".") {
myConstraintName <- substr(myConstraintName,start=0,stop=nchar(myConstraintName)-1)
}
#get UNIF weight
myUnifWeight <- as.numeric(as.character(NHGGrammar$constraintWeight[NHGGrammar$constraintName==myConstraintName]))
#Now draw a line there
abline(v=myUnifWeight,col=3,lty=2)
#And add a label
par(las=3) #vertical text
suppressWarnings(mtext(myMorphUnicode,side=3, at=myUnifWeight, line=0.5))
par(las=0) #return to default
}
#Now the same thing, but written to a file for pasting into paper
png(file="Tagalog_p_vs_k_NHG.png",width=myResMultiplier*460,height=myResMultiplier*270,res=myResMultiplier*72,family=myFontFamily)
par(mar=c(5,4,5,0)+0.1)
curve(pnorm(NC_weight+NasSub_weight-mnng_weight-x, sd = sqrt(4*4)), 0, 25, lty=1, xlab="UNIF weight", ylab="prob. of substitution",ylim=c(0,1), cex.axis=0.9)
curve(pnorm(NC_weight+NasSub_weight-mnng_weight-nng_weight-ng_weight-x, sd = sqrt(6*4)), add=TRUE, lty=3)
legend(20, 1.0, c("p","k"), cex=0.8, lty=c(1,3))
for(myMorph in levels(nas_sub$morph)) {
myMorphUnicode <- nas_sub$morph_unicode[nas_sub$morph == myMorph][1]
myConstraintName <- paste("Faith-",myMorph,sep="")
if(substr(myConstraintName,start=nchar(myConstraintName),stop=nchar(myConstraintName))==".") {
myConstraintName <- substr(myConstraintName,start=0,stop=nchar(myConstraintName)-1)
}
myUnifWeight <- as.numeric(as.character(NHGGrammar$constraintWeight[NHGGrammar$constraintName==myConstraintName]))
abline(v=myUnifWeight,col=3,lty=2)
par(las=3)
suppressWarnings(mtext(myMorphUnicode,side=3, at=myUnifWeight, line=0.5))
par(las=0)
}
dev.off()
## pdf
## 2
The GLA is not deterministic, and results can vary substantially from one run to the next. We used a (non-distributed) version of OTSoft that can run the GLA 10, 100, or 1000 times on the same input file, collating the results in a single file. We can then choose the best-fit model from this set.
We did this three times, twice in an attempt to find the best StOT model, and once to find the best stratal Anttilan model–in each case, the learner was run 1000 times (though, as we’ll see below, only the first 999 runs are examined):
use Magri update rule: no
use Magri update rule: yes
use Magri update rule: no
To examine the results, we read in the collated files:
conn <- file("CollateRuns_Tagalog_NoMagri.txt",open="r")
collated_NoMagri <- readLines(conn)
close(conn)
conn <- file("CollateRuns_Tagalog_WithMagri.txt",open="r")
collated_WithMagri <- readLines(conn)
close(conn)
conn <- file("CollateRuns_Tagalog_Stratal.txt",open="r")
collated_Stratal <- readLines(conn)
close(conn)
A function to read in all 1000 runs from a collated file, and find the one with the best log likelihood: (I wrote this function so that if the very last grammar gets ignored; this is equivalent to doing 999 runs rather than 1000. It would have required a bunch of extra programming to get the last one too.)
find_best_grammar <- function(collatedFile, backoff=1/100000) {
#initialize
best_log_like <- -Inf #the log likelihood to beat; starts out infinitely bad
current_log_like <- 0
best_grammar <- data.frame(constraintName=character(0), rankingValue=numeric(0))
current_grammar <- data.frame(constraintName=character(0), rankingValue=numeric(0))
best_probabilities <- data.frame(input=character(0), output=character(0), observedFreq=numeric(0), predictedProb=numeric(0))
current_probabilities <- data.frame(input_C=character(0), input_morph=character(0), output=character(0), observedFreq=numeric(0), predictedProb=numeric(0))
current_index <- 1
#step through lines of collated file
for(i in 1:length(collatedFile)) {
#parse the line
myLine <- strsplit(collatedFile[i], split="\t")
#if index has gone up, then before going on to the next grammar, it's time to assess the one just finished
if(as.numeric(myLine[[1]][2]) > current_index) {
if(current_log_like > best_log_like) { #this grammar is the new winner
best_log_like <- current_log_like
best_grammar <- current_grammar
best_probabilities <- current_probabilities
}
#either way [i.e., whether latest grammar was new winner or not], start the grammar, probabilities, and log likelihood fresh and update the index
current_grammar <- data.frame(constraintName=character(0), rankingValue=numeric(0))
current_probabilities <- data.frame(input_C=character(0), input_morph=character(0), output=character(0), observedFreq=numeric(0), predictedProb=numeric(0))
current_log_like <- 0
current_index <- as.numeric(myLine[[1]][2])
}
#process current line
if(myLine[[1]][1] == "G") { #if starts with G, add to grammar
current_grammar <- rbind(current_grammar, data.frame(constraintName=myLine[[1]][3], rankingValue=myLine[[1]][4]))
} else if(myLine[[1]][1] == "O" & (myLine[[1]][6]=="unsubstituted" | myLine[[1]][6]=="substituted")) { #if starts with O [that's a capital letter, not a number], add to probabilities, and update log likelihood; don't both if it's one of those weird lines at the end of each group with a nonexistent output
#split out the word1 and the word2-group
myC <- strsplit(myLine[[1]][4],split=" ",fixed=TRUE)[[1]][1]
myMorph <- str_sub(myLine[[1]][4],str_length(myC)+2)
#add line to data frame
current_probabilities <- rbind(current_probabilities, data.frame(input_C=myC, input_morph=myMorph, output=myLine[[1]][6], observedFreq=myLine[[1]][7], predictedProb=myLine[[1]][8]))
myProb <- as.numeric(myLine[[1]][8])
if(myProb == 0) {
myProb <- backoff
} else if (myProb==1) {
myProb <- 1 - backoff
}
current_log_like <- current_log_like + as.numeric(myLine[[1]][7])*log(myProb)
}
}
#return best grammar, probabilities, and log likelihood
return(c(best_grammar,best_probabilities,best_log_like))
}
Find the best grammar from each group:
best_NoMagri <- find_best_grammar(collated_NoMagri) #-367.5775
best_WithMagri <- find_best_grammar(collated_WithMagri) #-314.6419
best_Stratal <- find_best_grammar(collated_Stratal) #-645.725
Print the grammars and their log likelihoods:
best_NoMagri_grammar <- data.frame(matrix(unlist(best_NoMagri[1:2]), nrow=11, byrow=F))
colnames(best_NoMagri_grammar) <- names(best_NoMagri)[1:2]
best_NoMagri_grammar
## constraintName rankingValue
## 1 *NC 185.34237510571
## 2 *[m/n/ng 21.583468845981
## 3 *[n/ng 180.719324679704
## 4 *[ng 180.922040050001
## 5 NasSub 178.416531154019
## 6 Faith-maN- other -376.180006827324
## 7 Faith-paN-RED- 172.665408206408
## 8 Faith-maN-RED- 178.542294785941
## 9 Faith-maN- adv 177.589151734176
## 10 Faith-paN- noun 183.121009685449
## 11 Faith-paN- res 185.845611261336
best_NoMagri[[8]][1] #log likelihood
## [1] -367.6
best_WithMagri_grammar <- data.frame(matrix(unlist(best_WithMagri[1:2]), nrow=11, byrow=F))
colnames(best_WithMagri_grammar) <- names(best_WithMagri)[1:2]
best_WithMagri_grammar
## constraintName rankingValue
## 1 *NC -304.152370006544
## 2 *[m/n/ng -2355.49149032455
## 3 *[n/ng -315.553354844323
## 4 *[ng -308.219712133132
## 5 NasSub -310.720345034178
## 6 Faith-maN- other -314.592178303321
## 7 Faith-paN-RED- -312.7112155541
## 8 Faith-maN-RED- -309.958550786064
## 9 Faith-maN- adv -309.024574806601
## 10 Faith-paN- noun -306.988839272625
## 11 Faith-paN- res -302.216131600666
best_WithMagri[[8]][1] #log likelihood
## [1] -314.6
best_Stratal_grammar <- data.frame(matrix(unlist(best_Stratal[1:2]), nrow=11, byrow=F))
colnames(best_Stratal_grammar) <- names(best_Stratal)[1:2]
best_Stratal_grammar
## constraintName rankingValue
## 1 *NC 2480
## 2 *[m/n/ng -2200
## 3 *[n/ng 2400
## 4 *[ng 2440
## 5 NasSub 2400
## 6 Faith-maN- other -13400
## 7 Faith-paN-RED- 1960
## 8 Faith-maN-RED- 2400
## 9 Faith-maN- adv 2380
## 10 Faith-paN- noun 2480
## 11 Faith-paN- res 2480
best_Stratal[[8]][1] #log likelihood
## [1] -645.7
Add the probabilities to the main data frame:
for(i in 1:length(nas_sub$consonant)) {
nas_sub$best_NoMagri[i] <- as.numeric(as.character(best_NoMagri$predictedProb[best_NoMagri$input_C==nas_sub$consonant[i] & best_NoMagri$input_morph==nas_sub$morph[i] & best_NoMagri$output=="substituted"]))
}
for(i in 1:length(nas_sub$consonant)) {
nas_sub$best_WithMagri[i] <- as.numeric(as.character(best_WithMagri$predictedProb[best_WithMagri$input_C==nas_sub$consonant[i] & best_WithMagri$input_morph==nas_sub$morph[i] & best_WithMagri$output=="substituted"]))
}
for(i in 1:length(nas_sub$consonant)) {
nas_sub$best_Stratal[i] <- as.numeric(as.character(best_Stratal$predictedProb[best_Stratal$input_C==nas_sub$consonant[i] & best_Stratal$input_morph==nas_sub$morph[i] & best_Stratal$output=="substituted"]))
}
Interaction plots (only WithMagri, since it was better than NoMagri)
par(mar=c(5,5,2,6))
suppressWarnings(with(nas_sub, {
interaction.plot(x.factor=consonant,
trace.factor=morph_unicode,
response=best_WithMagri,
xlab="stem-initial consonant",ylab="nas. sub. rate, best StOT model"
, trace.label="prefix construction", fixed=TRUE, type="b", pch=c(0,1,2,20,18,15))
}))
#Since this will appear in the paper, also do a PNG file:
png(file="Tagalog_StOTNoMagri_model.png",width=myResMultiplier*460,height=myResMultiplier*300, res=myResMultiplier*72,family=myFontFamily)
par(mar=c(5,4,2,0)+0.1)
with(nas_sub, {
interaction.plot(x.factor=consonant,
trace.factor=morph_unicode,
response=best_WithMagri,
xlab="stem-initial consonant",ylab="nas. sub. rate, bestStOT model"
, trace.label="prefix construction", fixed=TRUE, type="b", pch=c(0,1,2,20,18,15))
})
dev.off()
## pdf
## 2
par(mar=c(5,5,2,6))
suppressWarnings(with(nas_sub, {
interaction.plot(x.factor=consonant,
trace.factor=morph_unicode,
response=best_Stratal,
xlab="stem-initial consonant",ylab="nas. sub. rate, best Stratal model"
, trace.label="prefix construction", fixed=TRUE, type="b", pch=c(0,1,2,20,18,15))
}))
#Since this will appear in the paper, also do a PNG file:
png(file="Tagalog_Stratal_model.png",width=myResMultiplier*460,height=myResMultiplier*300, res=myResMultiplier*72,family=myFontFamily)
par(mar=c(5,4,2,0)+0.1)
with(nas_sub, {
interaction.plot(x.factor=consonant,
trace.factor=morph_unicode,
response=best_Stratal,
xlab="stem-initial consonant",ylab="nas. sub. rate, best Stratal model"
, trace.label="prefix construction", fixed=TRUE, type="b", pch=c(0,1,2,20,18,15))
})
dev.off()
## pdf
## 2
Write an OTSoft input file with these constraints
and the Unif constraints as before:
write(c("\t\t\t*N+p\t*N+t/s\t*N+k\t*N+b\t*N+d\t*N+g\tFaith-maN- other\tFaith-paN-RED-\tFaith-maN-RED-\tFaith-maN- adv\tFaith-paN- noun\tFaith-paN- res","\t\t\t*N+p\t*N+t/s\t*N+k\t*N+b\t*N+d\t*N+g\tFaith-maN- other\tFaith-paN-RED-\tFaith-maN-RED-\tFaith-maN- adv\tFaith-paN- noun\tFaith-paN- res"), file="Tagalog_for_OTSoft_Pointwise.txt",append=FALSE) #overwrite anything there already
#Loop through data frame, turning each row into a two-row tableau:
for(i in 1:length(nas_sub$consonant)) {
#make the input
myInput <- paste(nas_sub$consonant[i], nas_sub$morph[i])
#make the violation vectors for the two candidates
mySubst_violations <- rep(0,12) #initialize to all zeros
myUnsubst_violations <- rep(0,12) #initialize to all zeros
if(nas_sub$consonant[i] == "p") {
myUnsubst_violations[1] <- 1 #*N+p
} else if(nas_sub$consonant[i] == "t/s") {
myUnsubst_violations[2] <- 1 #*N+t/s
} else if(nas_sub$consonant[i] == "k") {
myUnsubst_violations[3] <- 1 #*N+k
} else if(nas_sub$consonant[i] == "b") {
myUnsubst_violations[4] <- 1 #*N+t/s
} else if(nas_sub$consonant[i] == "d") {
myUnsubst_violations[5] <- 1 #*N+d
} else if(nas_sub$consonant[i] == "g") {
myUnsubst_violations[6] <- 1 #*N+g
}
if(nas_sub$morph[i] == "maN- other") {
mySubst_violations[7] <- 1
} else if(nas_sub$morph[i] == "paN-RED-") {
mySubst_violations[8] <- 1
} else if(nas_sub$morph[i] == "maN-RED-") {
mySubst_violations[9] <- 1
} else if(nas_sub$morph[i] == "maN- adv.") {
mySubst_violations[10] <- 1
}else if(nas_sub$morph[i] == "paN- noun") {
mySubst_violations[11] <- 1
}else if(nas_sub$morph[i] == "paN- res.") {
mySubst_violations[12] <- 1
}
mySubst_violations <- paste(as.character(mySubst_violations),collapse="\t")
myUnsubst_violations <- paste(as.character(myUnsubst_violations),collapse="\t")
#print the two rows
write(c(paste(myInput,"substituted",nas_sub$subst_count[i],mySubst_violations,sep="\t"),paste("","unsubstituted",nas_sub$unsubst_count[i],myUnsubst_violations,sep="\t")),file="Tagalog_for_OTSoft_Pointwise.txt",append=TRUE)
}
#Because we're using the multifold GLA tool, I don't thing we need the extra line here.
Go outside program to run OTSoft, then copy results folder (including CollateRuns.txt, which will be generated outside the folder and so much be moved) to the directory that this script is in. Get the best grammar, using same functions defined above:
conn <- file("FilesForTagalog_for_OTSoft_Pointwise_NoMagri/CollateRuns.txt",open="r")
collated_NoMagri_pointwise <- readLines(conn)
close(conn)
conn <- file("FilesForTagalog_for_OTSoft_Pointwise_WithMagri/CollateRuns.txt",open="r")
collated_WithMagri_pointwise <- readLines(conn)
close(conn)
best_NoMagri_pointwise <- find_best_grammar(collated_NoMagri_pointwise) # -269.5477
best_WithMagri_pointwise <- find_best_grammar(collated_WithMagri_pointwise) #-301.2741
best_NoMagri_grammar_pointwise <- data.frame(matrix(unlist(best_NoMagri_pointwise[1:2]), nrow=12, byrow=F))
colnames(best_NoMagri_grammar_pointwise) <- names(best_NoMagri_pointwise)[1:2]
best_NoMagri_grammar_pointwise
## constraintName rankingValue
## 1 *N+p 105.414461213438
## 2 *N+t/s 104.207601630031
## 3 *N+k 103.090639156339
## 4 *N+b 100.145366685766
## 5 *N+d 96.1249529465515
## 6 *N+g 91.0427383147581
## 7 Faith-maN- other 95.8667245776422
## 8 Faith-paN-RED- 97.1423608436616
## 9 Faith-maN-RED- 99.9153473148658
## 10 Faith-maN- adv 99.2422484730615
## 11 Faith-paN- noun 102.516614475586
## 12 Faith-paN- res 105.290944368299
best_NoMagri_pointwise[[8]][1] #log likelihood
## [1] -269.5
best_WithMagri_grammar_pointwise <- data.frame(matrix(unlist(best_WithMagri_pointwise[1:2]), nrow=12, byrow=F))
colnames(best_WithMagri_grammar_pointwise) <- names(best_WithMagri_pointwise)[1:2]
best_WithMagri_grammar_pointwise
## constraintName rankingValue
## 1 *N+p -3835.948197276
## 2 *N+t/s -3839.91358991296
## 3 *N+k -3839.64807817019
## 4 *N+b -3843.84019294074
## 5 *N+d -3845.91415551674
## 6 *N+g -3848.24712543703
## 7 Faith-maN- other -3846.77745851803
## 8 Faith-paN-RED- -3845.41540262222
## 9 Faith-maN-RED- -3843.61460930535
## 10 Faith-maN- adv -3843.43178633214
## 11 Faith-paN- noun -3841.84813886068
## 12 Faith-paN- res -3836.39266627683
best_WithMagri_pointwise[[8]][1] #log likelihood
## [1] -301.3
Add predicted probabilities to data frame:
for(i in 1:length(nas_sub$consonant)) {
nas_sub$best_NoMagri_pointwise[i] <- as.numeric(as.character(best_NoMagri_pointwise$predictedProb[best_NoMagri_pointwise$input_C==nas_sub$consonant[i] & best_NoMagri_pointwise$input_morph==nas_sub$morph[i] & best_NoMagri_pointwise$output=="substituted"]))
}
for(i in 1:length(nas_sub$consonant)) {
nas_sub$best_WithMagri_pointwise[i] <- as.numeric(as.character(best_WithMagri_pointwise$predictedProb[best_WithMagri_pointwise$input_C==nas_sub$consonant[i] & best_WithMagri_pointwise$input_morph==nas_sub$morph[i] & best_WithMagri_pointwise$output=="substituted"]))
}
Plot results of best grammar:
par(mar=c(5,5,2,6))
suppressWarnings(with(nas_sub, {
interaction.plot(x.factor=consonant,
trace.factor=morph_unicode,
response=best_NoMagri_pointwise, #since NoMagri fit better in this case
xlab="stem-initial consonant",ylab="nas. sub. rate, best StOT model,\nseparate constraint for each consonant"
, trace.label="prefix construction", fixed=TRUE, type="b", pch=c(0,1,2,20,18,15))
}))
#Since this will appear in the paper, also do a PNG file:
png(file="Tagalog_StOTNoMagri_pointwise_model.png",width=myResMultiplier*460,height=myResMultiplier*300, res=myResMultiplier*72,family=myFontFamily)
par(mar=c(5,5,2,0)+0.1)
with(nas_sub, {
interaction.plot(x.factor=consonant,
trace.factor=morph_unicode,
response=best_NoMagri_pointwise,
xlab="stem-initial consonant",ylab="nas. sub. rate, bestStOT model, \none constraint per consonant"
, trace.label="prefix construction", fixed=TRUE, type="b", pch=c(0,1,2,20,18,15))
})
dev.off()
## pdf
## 2
Create something that looks like a Hasse diagram, but where the vertical axis (ranking values) is to scale. We want these vertical values:
106 NC̥ 100 [root ŋ 94 UNIF-paŋ-nom. 93 NasSub 89 *[root m/n/ŋ UNIF-maŋ-other
and then lines connecting each level (and two lines coming out of NasSub).
constraint_names <- c("*NC\U0325 (106)","*[ \U014B (100)","U\U0274\U026Aғ-pa\U014B-\U0274\U1D0F\U1D1C\U0274 (94)","N\U1D00sS\U1D1C\U0299 (93)","*[ m/n/\U014B (89)", "U\U0274\U026Aғ-ma\U014B-\U1D0F\U1D1B\U029C\U1D07\U0280 (89)")
x <- c(10,10,10,10,7,13) #horizontal positions
y <- c(106,100,94,93,89,89) #ranking values
vert_increment <- 0.4
plot(x, y, ylab="ranking value", type="n", xaxt="n", xlim=c(5,16), xlab="") #empty plot, no x axis ticks or labels
suppressWarnings(text(x, y, labels = constraint_names)) #add labels at the (x,y) coordinates given
segments(x[1],y[1]-vert_increment,x[2],y[2]+vert_increment) #add line segment from *NC to *[root N
segments(x[2],y[2]-vert_increment,x[3],y[3]+vert_increment) #from *[root N to Unif paN
segments(x[3],y[3]-vert_increment,x[4],y[4]+vert_increment) #from Unif to NasSub
segments(x[4],y[4]-vert_increment,x[5],y[5]+vert_increment) #from NasSub to *[root m/n/N
segments(x[4],y[4]-vert_increment,x[6],y[6]+vert_increment) #from NasSub to Unif maN
#write it to a file
png(file="Tagalog_PseudoHasse.png",width=myResMultiplier*300,height=myResMultiplier*460, res=myResMultiplier*72,family=myFontFamily)
par(mar=c(2,4,2,0)+0.1)
plot(x, y, ylab="ranking value", type="n", xaxt="n", xlim=c(5,15.5), xlab="") #empty plot, no x axis ticks or labels
text(x, y, labels = constraint_names) #add labels at the (x,y) coordinates given
segments(x[1],y[1]-vert_increment,x[2],y[2]+vert_increment) #add line segment from *NC to *[root N
segments(x[2],y[2]-vert_increment,x[3],y[3]+vert_increment) #from *[root N to Unif paN
segments(x[3],y[3]-vert_increment,x[4],y[4]+vert_increment) #from Unif to NasSub
segments(x[4],y[4]-vert_increment,x[5],y[5]+vert_increment) #from NasSub to *[root m/n/N
segments(x[4],y[4]-vert_increment,x[6],y[6]+vert_increment) #from NasSub to Unif maN
dev.off()
## pdf
## 2
Note that we already retrieved the log likelihood of the multiplicative model above. For reference, it is -1 * myOptimization\(value, or `{r -1*myOptimization\)value}`. We also already got them for the best StOT and Stratal models.
Make a function for getting log Likelihood using a column from nas_sub
:
#for NHG models, where we use similation to get predicted probabilities, we could get some zeroes. Since we run 1,000,000 trials, we back of zeros to 1/1000000 by default.
getLogLike <- function(colName, backoff = 1/1000000) {
log_likelihood <- 0 #initialize
for(i in 1:length(nas_sub$consonant)) {
sub_prob <- nas_sub[i,colName]
if(sub_prob == 0) { #avoid logs of 0 or 1
sub_prob <- sub_prob + backoff
} else if(sub_prob == 1) {
sub_prob <- sub_prob - backoff
}
log_likelihood <- log_likelihood + nas_sub$subst_count[i] * log(sub_prob) + nas_sub$unsubst_count[i] * log(1-sub_prob)
}
return(log_likelihood)
}
Now get them all (including ones we already got above, as a check that the function is correct). I’m doing it with the default backoff (1/1000000), appropriate for NHG, where grammar was checked 1,000,000 times, and also the backoff used for picking best StOT and Stratal grammars (100,000, since those grammars were checked 100,000 times)
#multiplicative:
getLogLike("multiplicative_prediction") #-292.3079
## [1] -292.3
getLogLike("multiplicative_prediction", backoff=1/100000) #no 0s or 1s, so backoff doesn't matter
## [1] -292.3
# MaxEnt:
getLogLike("maxentPredictions") #-284.8209
## [1] -284.8
#NHG:
getLogLike("NHG_Predictions") #-293.0787
## [1] -294.5
getLogLike("NHG_Predictions", backoff=1/100000) #-293.0787: no 0s or 1s, so backoff doesn't matter
## [1] -294.5
#best StOT (no Magri):
getLogLike("best_NoMagri") #-381.3918
## [1] -381.4
getLogLike("best_NoMagri", backoff=1/100000) #-367.5775
## [1] -367.6
#best StOT (with Magri):
getLogLike("best_WithMagri") #-314.6419
## [1] -314.6
getLogLike("best_WithMagri", backoff=1/100000) #-314.6419: no 0s or 1s, so backoff doesn't matter
## [1] -314.6
#best Stratal
getLogLike("best_Stratal") #-738.9729
## [1] -739
getLogLike("best_Stratal", backoff=1/100000) #-645.725
## [1] -645.7
For comparison, we also include a baseline model, with perfect frequency matching for each combination of word2-group and word1. It will still not fit the data perfectly, because it it predicts voculence 60% of the time, for a category that has 60% voculence, it will still sometimes predict voculence when the datum is non-voculent (0.6 * 0.4), and vice-versa (0.4 * 0.6), for a total error rate of 48%. This provides a ceiling on how well any model (that makes the same distinctions ours do) could do.
To do this, we just run the getLogLike()
function again but for the observed rates:
getLogLike("subst_rate") #-254.6602
## [1] -254.7
getLogLike("subst_rate", backoff=1/100000) #-254.6639
## [1] -254.7