#                             Sagemath Demo
#                         http://www.sagemath.org

1+1

1 + 1.0

# Of 30 graduating students, how many ways are there for 15 to be employed in a job related to their field of study, 10 to be employed in a job unrelated to their field of study, and 5 unemployed?
factorial(30)/(factorial(15)*factorial(10)*factorial(5))

# Charlotte's homework problem
# 4x / (4x^2-8x+7) + 3x / (4x^2-10x+7) = 1
y = 4*x / (4*x^2-8*x+7) + 3*x / (4*x^2-10*x+7)
y 
#
factor(y)

numerator(y) - denominator(y) # Equals zero
#
poly = expand( numerator(y) - denominator(y) ); poly
# 
poly = expand( denominator(y) - numerator(y)); poly # Reverse the sign
factor(poly)

# Solve directly
eq = 4*x/(4*x^2-8*x+7) + 3*x/(4*x^2-10*x+7) == 1
solve(eq,x) # Solve eq for x

# Derivatives
f(x) = exp(4*x)/(1+exp(4*x))
derivative(f(x),x,3) # Derivative with respect to x, 3 times

factor(_)

# Look at f(x) again. Is it a CDF?
f(x)

limit(f(x),x=-Infinity);limit(f(x),x=Infinity)

# The (single) derivative of f is a density
derivative(f(x),x)
# 
f(x).derivative(x) # Another way

# Simplify and save the density
g(x) = factor(f(x).derivative(x)); g(x)

plot(g(x),x,-5,5)

# Is g(x) symmetric around zero?
# If so, then expected value = median = 0.
f(0)

# Symmetry about zero means g(x) = g(-x)
g(x)-g(-x)
 
factor(_)

# (Mult num & denom of g(-x) by e^(8x))

# Replace 4 with theta in f(x)
var('theta')
F(x) = exp(theta*x)/(1+exp(theta*x)); F(x) 

# Is F(x) a distribution function?
limit(F(x),x=-Infinity)

assume(theta>0)
F(x).limit(x=-oo); F(x).limit(x=oo)

# Differentiate to get the density.
# New f(x) will replace the old one. 
f(x) = factor(F(x).derivative(x)); f(x)

# f(x) is symmetric about zero, so the expected value should be zero.
factor(f(x)-f(-x)) # Checking symmetry

# Expected value
integrate(x*f(x),x,-oo,oo)

# Add a location parameter
var('mu')
F(x) = exp(theta*(x-mu))/(1+exp(theta*(x-mu))); F(x)

# Get the density
f(x) = factor(F(x).derivative(x)); f(x)

# Human re-expression is better. Look at density without location parameter.

# Some variables do not have to be declared.
pi



# Is that really the ratio of a circle's circumference to its diameter, or just the Greek letter?
cos(pi)

# That's pretty promising. Evaluate it numerically.
n(pi) # Could also say pi.n() 
n(pi,digits=30)

# Normal density
var('mu, sigma')
assume(sigma>0) 
f(x) = 1/(sigma*sqrt(2*pi)) * exp(-(x-mu)^2/(2*sigma^2)); f(x)

# Integrate the density
integrate(f(x),x,-oo,oo)

simplify(_) # Or factor

# E(X)
integrate(x*f(x),x,-oo,oo)


# Variance is E(X-mu)^2
integrate((x-mu)^2*f(x),x,-oo,oo)

# Weird shit with is mu pos, neg or zero.


# Calculate the moment-generating function and use it to get $E(X^4)$.
# Moment-generating function M(t) = E(e^{Xt})
var('t')
M(t) = integrate(exp(x*t)*f(x),x,-oo,oo); M(t)

M(t) = factor(M(t))

# Differentiate four times, set t=0
derivative(M(t),t,4)(t=0)

# Or do it directly
integrate(x^4*f(x),x,-oo,oo)

factor(_)

# First 10 moments
for j in interval(1,10) : 
    show(derivative(M(t),t,j)(t=0))

# Display LaTeX
for j in interval(1,10) : 
    print(latex(derivative(M(t),t,j)(t=0)))


# In the geometric distribution, a coin with P(head) = theta is tossed repeatedly, and X is the number of tosses required to get the first head.
var('theta')
assume(0<theta); assume(theta<1)
p(x) = theta*(1-theta)^(x-1); p(x)
p(x).sum(x,1,oo)                 # Sum the pmf
(x*p(x)).sum(x,1,oo)             # Expected value
((x-1/theta)^2*p(x)).sum(x,1,oo) # Variance

# Maximum likelihood
# Minus Log Likelihood for univariate normal
# s1 is sum of x, s2 is sum of x^2
var('mu sigma s1 s2 n')
mLL = n*log(sigma) + n/2 * log(2*pi) + 1/(2*sigma^2) * (s2 - 2*mu*s1 + n*mu^2)
mLL

# Differentiate, set derivatives to zero
d1 = derivative(mLL,mu); d2 = derivative(mLL,sigma)
eq = [d1==0,d2==0]; eq

# Extract the second list of solutions
sol1[1]

# This time, get the solutions in the form of a LIST of dictionaries.
# Save item one, the second one. (Indices begin with zero, not one.)
mle = solve(eq,[mu,sigma],solution_dict=True)[1]; mle

# Refer to the elements of a dictionary using the keys.
mle[mu] # MLE of mu

# H will be hessian of MINUS log likelihood
H = identity_matrix(SR,2); H # SR is the Symbolic Ring

# Fill it with mixed partial derivatives
H[0,0] = derivative(mLL,mu,2); H[0,1] = derivative(mLL,[mu,sigma])
H[1,0] = H[0,1] ; H[1,1] = derivative(mLL,sigma,2)
H = factor(H); H

# Evaluate at mle
hmle = factor(H(mle)); hmle

# Function is concave up at critical point iff all eigenvalues > 0 there.
hmle.eigenvalues()

# A bit of linear algebra

var('alpha beta gamma delta')
A = matrix( SR, [[alpha, beta],[gamma, delta]] ); A

A[0,1]

# Note the nice subscripts
var('x11 x12 x13 x21 x22 x23')
B = matrix(SR, [[x11, x12, x13], [x21, x22, x23]])
B

a = 2
a*A

C = A*B; C

show(A)
A.transpose()

A.determinant()

D = C.transpose() # C is 2x3, D is 3x2
E = (C*D).inverse() # Inverse of C*D
factor(E) # E is HUGE! This is not as bad. Factor is a good way to simplify.

A.inverse() # Here is something we can look at without a scrollbar.

Ainverse = factor(_) # Factor the last expression.
Ainverse

Ainverse(alpha=1,gamma=2)

denominator(Ainverse[0,1])

(D.nrows(),D.ncols()) # A tuple. Recall C was 2x3

DC = D*C
DC.rank()

A.eigenvalues() # Returns a list

# ==============================================================================

# Going to sem. First look at that cool factor analysis model from lecture with 2 factors and 3 variables per factor. Can we cross over?

Lambda = matrix(SR,6,2) # Matrix in the Symbolic Ring, 6 rows and 2 columns
Lambda # It's full of zeros

Lambda[0,0] = 1; Lambda[1,0] = var('lambda2'); Lambda[2,0] = var('lambda3')
Lambda[3,1] = var('lambda4'); Lambda[4,1] = var('lambda5'); Lambda[5,1] = 1
Lambda

Phi = matrix(SR,2,2)
Phi[0,0] = var('phi11'); Phi[0,1] = var('phi12')
Phi[1,0] = var('phi12'); Phi[1,1] = var('phi22')
Phi

Omega = matrix(SR,6,6)
for j in range(6): Omega[j,j] = var('omega'+str(j+1))
Omega

Sigma1 = Lambda*Phi*Lambda.transpose() + Omega
Sigma1

# We know we can add covariances between some error terms and still
# maintain identifiability. For example, let Cov(e3,34) = omega34
O2 = copy(Omega) # Work with a copy to avoid changing the original.
O2[2,3] = var('omega34'); O2[3,2] = var('omega34')
O2

Lambda*Phi*Lambda.transpose() + O2

# Would running an arrow from F1 to d4 do the same thing?
Lambda
#
L2 = copy(Lambda)
L2[3,0] = var('lambda7')
L2

Sigma2 = L2*Phi*L2.transpose() + Omega
Sigma2

# Here is some commentary I hope I remember. 
    # First of all, the F1 system is unaffected, and so those parameters can be used to solve for the others.
    # Second, phi12 = sigma16 is still identified too. 
    # Everything in the F2 system is corrupted. This is what stopped me when I was doing it by hand. 
    # Disregarding the omegas of course, need to identify lambda4, lambda5, lambda7 and phi22. That will be 4 equations.
    # lambda5 = sigma45/sigma46. Okay.
    # sigma56 = lambda5 phi22, so phi22 is identified.
    # Still need lambda4 and lambda7. 
    # Now look at sigma24 and sigma34. That's 2 linear equations in 2 unknowns. We can solve for lambda7 and lambda4, EXCEPT if lambda2=lambda3, or if phi12=0. If phi12=0, at least we have lambda7. Can we get lambda4 in that case? This is getting complicated, so

Sigma2(phi12=0)

# Ho Ho! We immediately have lambda7, phi22, lambda4 and lambda5.

# What about the case where lambda2=lambda3? Try this.
Sigma2(lambda2=lambda3)

sigma14 and sigma46 look promising, but what if phi12=0? Already checked that, okay. 

# Have lambda5 from sigma15 right away.
# Then phi22 from sigma56.
# Then lambda4 and lambda7 from sigma14 and sigma46? Get it explicitly.

# Make a list of equations to solve
var('sigma14 sigma46')
eqns = [ sigma14 == Sigma2[0,3], sigma46 == Sigma2[3,5] ]
for eq in eqns: show(eq)

# Solution(s) will be a list of dictionaries
sol = solve(eqns, [lambda4,lambda7], solution_dict=True); sol

# Just to illustrate the nice syntax,
solut = sol[0]
solut[lambda4] # Parameters are the "keys" in the dictionary

# Well, that's one arrow. Could there be four?

# Just to see how it could be set up with the sem package ...
sem = 'http://www.utstat.toronto.edu/brunner/openSEM/sage/sem.sage'
load(sem)
# load('~/sem.sage') # To load a local version in your home directory

Contents()

# Make matrices
LAMBDA = ZeroMatrix(6,2)
LAMBDA[0,0] = 1; LAMBDA[1,0] = var('lambda2'); LAMBDA[2,0] = var('lambda3')
LAMBDA[3,1] = var('lambda4'); LAMBDA[4,1] = var('lambda5'); LAMBDA[5,1] = 1
LAMBDA
# 
PHI = SymmetricMatrix(2,'phi'); PHI
OMEGA = DiagonalMatrix(6,'omega'); OMEGA

# ----------------------------------------------------------------------

SIGMA = FactorAnalysisCov(LAMBDA,PHI,OMEGA); SIGMA

# Covariance structure equations
eqns = SetupEqns(SIGMA) # A list
for eq in eqns: show(eq)

# To solve equations for parametrers, need a list of parameters
params = Parameters(LAMBDA); params
params.extend(Parameters(PHI)); params
params.extend(Parameters(OMEGA)); params
len(params); len(eqns)

# Try to solve. This is not going to work.
solve(eqns,params)

# [] is an empty list. It means there are no general solutions. 
# It happens because of the equality constraints. As far as Sagemath knows, 
# the sigma_ij are just numbers, and there is no reason that generic numbers 
# would obey those constraints.
# Need the same number of equations as unknowns: 13 (drop 8 equations). 

# Look at the equations again, as tuples.
k = len(eqns)
for index in range(k): index,eqns[index]

# Keep all diagonal elements of Sigma
keep = [0,6,11,15,18,20]
# Equations 1,2 and 7 will identify lambda2, lambda3, phi11
keep.extend([1,2,3,4,5,7]); len(keep)
# I think we're missing phi22
# Syntax: append items, extend lists
keep.append(17); keep  

# New list of equations to solve
keep.sort() # Sort them (change in place)
eqns2 = []
for index in keep: 
    show(eqns[index])
    eqns2.append(eqns[index])

# Solve, returning solutions as a list of dictionaries
sol = solve(eqns2,params,solution_dict=True)
len(sol) # Should have one item (unique solution)

sol = sol[0] # Just the first dictionary
sol[omega4]

# Look at all the solutions. Dictionaries are unordered, so,
for p in params: show(p == sol[p])

# One might think we need phi12 ne 0, but you know it's not true. 
# Selecting equations to solve is tough. Try Groebner basis.
GroebnerBasis?

# Get polynomials
p = SetupEqns(SIGMA, poly=True)
for item in p: show(item)

# Parameters must include sigma_ij, which come last
par2 = copy(params)
par2.extend(Parameters(SymmetricMatrix(6,'sigma')))
par2

gb = GroebnerBasis(p,par2)
for item in gb: show(item)


# Try for a better ordering of variables. I care least about the omegas.
# So put them first.
par3 = Parameters(OMEGA)
par3.extend(Parameters(PHI))
par3.extend(Parameters(LAMBDA))
par3.extend(Parameters(SymmetricMatrix(6,'sigma')))
par3

gb = GroebnerBasis(p,par3)
for item in gb: show(item)

-------------------------------------------------

# Try the bifactor model
load('~/sem.sage')
Contents()

# Parameter matrices
LAMBDA = ZeroMatrix(9,4)
for j in range(9): LAMBDA[j,0] = var('lambda'+str(j+1))
LAMBDA
#
LAMBDA[0,1] = var('lambda10'); LAMBDA[1,1] = var('lambda11')
LAMBDA[2,1] = var('lambda12'); LAMBDA
LAMBDA[3,2] = var('lambda13'); LAMBDA[4,2] = var('lambda14')
LAMBDA[5,2] = var('lambda15'); LAMBDA
LAMBDA[6,3] = var('lambda16'); LAMBDA[7,3] = var('lambda17')
LAMBDA[8,3] = var('lambda18'); LAMBDA
#
OMEGA = DiagonalMatrix(9,'omega')
PHI = IdentityMatrix(4)
SIGMA = FactorAnalysisCov(LAMBDA,PHI,OMEGA); SIGMA

# Try a Groebner basis
p = SetupEqns(SIGMA, poly=True)
param = Parameters(OMEGA)
param.extend(Parameters(LAMBDA)); show(param)
param.extend(Parameters(SymmetricMatrix(9,'sigma')))

# gb = GroebnerBasis(p,param)
# Leaving it running overnight ... No.

# Try eliminating the diagonal
p2 = SetupEqns(SIGMA, poly=True, corr=True)
for item in p2: item
#
param2 = Parameters(LAMBDA); param2
#
sigij = Parameters(SymmetricMatrix(9,'sigma',corr=True)); sigij
#
param2.extend(sigij); param2

# Now a Groebner basis, with fewer polynomials
gb2 = GroebnerBasis(p2,param2) 

# It finished within a few minutes -- don't know exactly how long.

# Take a look
# for item in gb2: item  # WARNING: Output truncated!
len(gb2)

for j in range(100): gb2[j] # All equality constraints

for j in interval(100,200): gb2[j]

# Well, only 8 more equality constraints, then a polynominal with lambda18^2. This reminds me that there are two (multiple) solutions unless a sign is fixed for each factor ... 
# Also, I need a different order, so I see solutions for smaller index numbers first.


-------------------------------------------------

# I Modified sem.sage SetupEqns on Depth Charge! No CovCheck now.


# Bifactor Two

load('~/sem.sage')
# Contents()

# Parameter matrices
LAMBDA = ZeroMatrix(9,4)
for j in range(9): LAMBDA[j,0] = var('lambda'+str(j+1))
LAMBDA
LAMBDA[0,1] = var('lambda10'); LAMBDA[1,1] = var('lambda11')
LAMBDA[2,1] = var('lambda12'); LAMBDA
LAMBDA[3,2] = var('lambda13'); LAMBDA[4,2] = var('lambda14')
LAMBDA[5,2] = var('lambda15'); LAMBDA
LAMBDA[6,3] = var('lambda16'); LAMBDA[7,3] = var('lambda17')
LAMBDA[8,3] = var('lambda18'); LAMBDA
OMEGA = DiagonalMatrix(9,'omega')
PHI = IdentityMatrix(4)
SIGMA = FactorAnalysisCov(LAMBDA,PHI,OMEGA); SIGMA


# Eliminating the diagonal, and assembling list of parameters 
# in reverse order of interest.
p = SetupEqns(SIGMA, poly=True, corr=True)
for item in p: item
#



param = Parameters(LAMBDA); param

# The order is somewhat fortunate.  I am interested in lambda1 
# most and lambda10 next.
param.reverse(); param

# Want equality restrictions involving lower numbered sigma_ij to come first.
sigij = Parameters(SymmetricMatrix(9,'sigma',corr=True))
sigij.reverse(); sigij

# Put the full list together
param.extend(sigij); param

# Now a Groebner basis. Had around 1900 before ...
gb2 = GroebnerBasis(p,param)

# It finished within a few minutes -- don't know exactly how long.
# Take a look
len(gb2) 

for j in range(110): gb2[j] 

for j in interval(107,120): gb2[j]


# Very nice. Look near the top of this last part.

show(gb2[108]) # Solution for lambda1
show(gb2[117]) # Solution for lambda10

# Tried to reduce the set of equality constraints by calculating a gb
# of them, but the set of polynomials was almost unchanged.
# Do it by eye.
sigpoly = gb2[0:106]
for j in range(len(sigpoly)): j, sigpoly[j]

#         0,  2,  5,  9, 14, 16, 20, 26, 34, 44, 47,  53, 63, 74, 91  That's 15
# sigma  14  15  16  17  18  24  25  26  27  28  37   38  47  48  57

# Go back and see 
# 10, 16, 50
# 19  26  29

# For 18 in all. That's all I can find. List them.

# List equality constraints
keep = [0,  2,  5,  9, 14, 16, 20, 26, 34, 44, 47,  53, 63, 74, 91, 10, 16, 50]
keep.sort(); keep

constraints = []
for j in keep: 
    con = sigpoly[j]
    show(con)
    constraints.append(con)

len(constraints)

# Identification residuals -- get a z for each one with the delta method.

------------------------------------------------- 

# PC as a substitute for CFA

# sem = 'http://www.utstat.toronto.edu/brunner/openSEM/sage/sem.sage'
# load(sem)
load('~/sem.sage') # To load a local version in your home directory

# Create model matrices
L = ZeroMatrix(3,1) # Lambda
L[0,0] = var('lambda1'); L[1,0] = var('lambda2'); L[2,0] = var('lambda3'); L
O = IdentityMatrix(3) # Omega
O[0,0] = 1-lambda1^2; O[1,1] = 1-lambda2^2; O[2,2] = 1-lambda3^2; O
Sigma = FactorAnalysisCov(L,IdentityMatrix(1),O); Sigma

# assume(lambda1 > 0); assume(lambda2 > 0); assume(lambda3 > 0) # Not enough
# assume(lambda1, "real"); assume(lambda2, "real"); assume(lambda3, "real") # No
eigval = Sigma.eigenvalues() # Compare communality = lambda1^2 + lambda2^2 + lambda3^2
for item in eigval: show(factor(item)) # Has imaginary part

# Try again
for item in eigval:
    n = Simplify(numerator(item))  # Expand, then factor
    d = Simplify(denominator(item))
    eigen = factor(n/d)
    show(eigen) 

# It is still awful. 

# With lambda1 = 0.9; lambda2 = 0.7; lambda3 = 0.5, get eigenvalues
# 1.9632830 0.6794930 0.3572239 from R
for j in range(3): show( eigval[j](lambda1 = 0.9, lambda2 = 0.7, lambda3 = 0.5).n() )

# Well, those imaginary parts are pretty small. 

# The eigenvectors are sure to be a mess, but just take a look.
EIG = Sigma.eigenvectors_right()
show(EIG[0])

# Returns a list of triples, one per eigenvalue:
    # e: the eigenvalue
    # V: list of vectors, basis for eigenspace
    # n: algebraic multiplicity
# My conclusion is that sagemath cannot do it.



# Using R within sagemath is possible for some tasks, but inconvenient.
# For Jupyter notebooks, it's a work in progress.
# So here's a pure R session


rm(list=ls())
lambda1 = 0.9; lambda2 = 0.7; lambda3 = 0.5
Lambda = c(lambda1, lambda2, lambda3); Lambda
sum(Lambda^2) # True explained variance
# Proportions of explained variance
Lambda^2/3
# Proportion of total variance explained
sum(Lambda^2)/3

Lambda = matrix(Lambda); Lambda
Sigma = Lambda %*% t(Lambda); Sigma
diag(Sigma) = c(1,1,1); Sigma
lambda1*lambda2 # Checking element (1,2)

ev = eigen(Sigma); ev
# Eigenvalues are 1.9632830 0.6794930 0.3572239

# Total variance explained by the factor is
# sum(Lambda^2) # 1.55

# Correlations between observed variable z and F are the factor loadings Lambda
# Correlations between z and the principal components ... 

D = diag(ev$values); D
C = ev$vectors; C
C %*% D %*% t(C) # Sigma

# Correlations between z and the principal components
corrZpc = C %*%sqrt(D); corrZpc

# Can always switch the sign (think A x = lambda x)
C = -C
# Correlations between z and the principal components again
corrZpc = C %*%sqrt(D); corrZpc

# Correlations between the principal components and the factor
solve(sqrt(D)) %*% t(C) %*% Lambda

# That's actually pretty good. Compare 0.88 to the correlation of the sum with the factor.
(lambda1+lambda2+lambda3)/sqrt(sum(Sigma)) # .87

# This is consistent with what I heard when I was a psych grad student. Don't bother with principal components; just use the sum. I would add standardize first, of course, unless they the items or tests involved are already on the same scale.

--------------------------------

--------------------------------


############################################
# Now simulate data to cross-check. Restart.
############################################

rm(list=ls()); set.seed(9999)
lambda1 = 0.9; lambda2 = 0.7; lambda3 = 0.5
omega1 = 1-lambda1^2; omega2 = 1-lambda2^2; omega3 = 1-lambda3^2
n = 10000

# Generate the data
F = rnorm(n); e1 = rnorm(n,0,sqrt(omega1))
e2 = rnorm(n,0,sqrt(omega2)); e3 = rnorm(n,0,sqrt(omega3))
z1 = lambda1 * F + e1; z2 = lambda2 * F + e2; z3 = lambda3 * F + e3
dat = cbind(z1,z2,z3)

# Extract just the first principal component. See Chapter 2.
pc1 = as.numeric(prcomp(dat, scale = T, rank = 1)$x) # Comes as a matrix
pc1 = -pc1 # As before
# The competitor is just the sum
sumz = z1+z2+z3
cor(F,cbind(pc1,sumz)) # Compare 0.8788727, 0.867502

--------------------------------

           pc1      sumz
[1,] 0.8788233 0.8662813

--------------------------------

########################################################################################
# I am inclined to generalize, but I think I will try a lot of different factor loadings. Simulate lambda_j from independent uniforms. Restart.
########################################################################################


rm(list=ls()); set.seed(9999); nsim = 100000 # One hundred thousand
results = matrix(NA,nsim,5)
colnames(results) = c('lambda1', 'lambda2', 'lambda3', 'corrFpc', 'corrFsum')

for(j in 1:nsim)
{
lambda1 = runif(1); lambda2 = runif(1); lambda3 = runif(1)
Lambda = matrix(c(lambda1, lambda2, lambda3))
omega1 = 1-lambda1^2; omega2 = 1-lambda2^2; omega3 = 1-lambda3^2
Omega = diag(c(omega1,omega2,omega3))
Sigma = Lambda %*% t(Lambda) + Omega; Sigma
ev = eigen(Sigma)
D = diag(ev$values)
C = ev$vectors
# Correlation between the first principal component and the factor
corr1 = abs( (solve(sqrt(D)) %*% t(C) %*% Lambda)[1,1] )
# Correlation between the sum and the factor. Absolute value.
corr2 = (lambda1+lambda2+lambda3)/sqrt(sum(Sigma))
# Record results
results[j,] = abs( c(lambda1,lambda2,lambda3,corr1,corr2) )
} # Next simulation
diff = results[,4]-results[,5] # Advantage of pc
results = cbind(results,diff)
corrFpc = results[,4]; corrFsum = results[,5]

table(diff>0) # True or false
# hist(diff, breaks = 'fd')
hist(diff, breaks = 50)

# plot(corrFpc,corrFsum,pch=20)
plot(corrFpc,corrFsum,pch='.')









-----------------------------------------------