############################################
#
#  MATERIAL COBERTO NESSE TUTORIAL
#
############################################
#
# 1) INSTALANDO E CARREGANDO UM PACOTE
# 2) PROCURANDO, INCLUINDO, EXCLUINDO OBJETOS
# 3) CHAMANDO FUNCOES
# 4) R COMO UMA CALCULADORA
# 5) VECTORS
# 6) PADROES DE VETORES
# 7) OPERACOES MATRICIAIS
# 8) MODE OF A VECTOR
# 9) FAZENDO LISTAS
# 10) COMPARACOES LOGICAS
# 11) GERACAO DE NUMEROS ALEATORIOS
# 12) CONTROLE DE FLUXO
# 13) ESCREVENDO FUNCOES EM R
# 14) QUEM E' MAIS RAPIDO?
# 15) VETORIZANDO O CALCULO
# 16) EXPLORATORY DATA ANALYSIS
# 17) SALVANDO UM GRAFICO
# 18) ESTATISTICAS BASICAS
#
############################################


############################################
# 1) INSTALANDO E CARREGANDO UM PACOTE
############################################

# This small data set was taken from Stock and 
# Watson (2007) and provideS information on the 
# number of library subscriptions to economic 
# journals in the United States of America in 
# the year 2000. The data set is available in 
# package AER under the name Journals.

install.packages("AER")

library("AER")

data("Journals", package = "AER")

dim(Journals)

names(Journals)

attach(Journals)

y = log(subs)

x = log(price/citations)

plot(y ~ x, data = Journals,xlab="Log price/citations",ylab="Subscription")

reg = lm(y ~ x)

abline(reg,col=2,lwd=2)

summary(reg)

objects()


##############################################
# 2) PROCURANDO, INCLUINDO, EXCLUINDO OBJETOS
##############################################
search()

x = 2

objects()

remove(x)

objects()


############################################
# 3) CHAMANDO FUNCOES
############################################

log(16, 2)

log(x = 16, 2)

log(16, base = 2)

log(base = 2, x = 16)


############################################
# 4) R COMO UMA CALCULADORA
############################################

2+2


2^3

log(3)

sin(pi/2)

pi

exp(1)


############################################
# 5) VECTORS
############################################

x = c(1.8, 3.14, 4, 88.169, 13)

length(x)

x[2]

x[2:3]

x[c(1,3,5)]

x[-c(2, 3, 5)]


2 * x + 3

5:1 * x + 1:5

log(x)

############################################
# 6) PADROES DE VETORES
############################################

ones = rep(1, 10)

even = seq(from = 2, to = 20, by = 2)

trend = 1981:2005

c(ones, even)


############################################
# 7) OPERACOES MATRICIAIS
############################################

A = matrix(1:6, nrow = 2)

t(A)

dim(A)

nrow(A)

ncol(A)

A1 = A[1:2, c(1, 3)]

A1

# Inversao de matriz
solve(A1)

A1 %*% solve(A1)

cbind(1, A1)

diag(4)

diag(3,4)

rbind(A1, diag(4, 2))


############################################
# 8) MODE OF A VECTOR
############################################
x = c(1.8, 3.14, 4, 88.169, 13)

mode(x)

x > 3.5

names(x) = c("a", "b", "c", "d", "e")

x

names(x) = letters[1:5] 

x

names(x) = LETTERS[1:5] 

x

x[3:5]

x[c("C","D","E")]

x[x > 3.5]

############################################
# 9) FAZENDO LISTAS
############################################
mylist = list(sample = rnorm(5),
              family = "normal distribution",
              parameters = list(mean = 0, sd = 1))

mylist

mylist[[1]]

mylist[["sample"]]

mylist$sample

mylist[[3]]$sd


############################################
# 10) COMPARACOES LOGICAS
############################################

x = c(1.8, 3.14, 4, 88.169, 13)

x > 3 & x <= 4

which(x > 3 & x <= 4)


all(x > 3)

any(x > 3)

(1.5 - 0.5) == 1

(1.9 - 0.9) == 1


# Coercion

is.numeric(x)

is.character(x)

as.character(x)

c(1, "a")


############################################
# 11) GERACAO DE NUMEROS ALEATORIOS
############################################

set.seed(123)

rnorm(2)

rnorm(2)

set.seed(123)

rnorm(2)

sample(1:5)

sample(c("male", "female"), size = 5, replace = TRUE,prob = c(0.2, 0.8))


############################################
# 12) CONTROLE DE FLUXO
############################################

x = c(1.8, 3.14, 4, 88.169, 13)

if(rnorm(1) > 0) sum(x) else mean(x)


ifelse(x > 4, sqrt(x), x^2)


for (i in 2:5){
  x[i] = x[i] - x[i-1]
}

x[-1]


while(sum(x) < 100){
  x <- 2 * x
}

x


############################################
# 13) ESCREVENDO FUNCOES EM R
############################################

cmeans = function(X){
  rval = rep(0, ncol(X))
  for(j in 1:ncol(X)) {
    mysum <- 0
    for(i in 1:nrow(X)) 
      mysum <- mysum + X[i,j]
    rval[j] <- mysum/nrow(X)
  }
  return(rval)
}


X = matrix(1:20, ncol = 2)

cmeans(X)

colMeans(X)

############################################
# 14) QUEM E' MAIS RAPIDO?
############################################

X = matrix(rnorm(2*10^6), ncol = 2)

system.time(colMeans(X))

system.time(cmeans(X))


############################################
# 15) VETORIZANDO O CALCULO
############################################
cmeans2 <- function(X) {
  rval <- rep(0, ncol(X))
  for(j in 1:ncol(X)) 
    rval[j] <- mean(X[,j])
  return(rval)
}

system.time(cmeans2(X))


apply(X, 2, mean)

system.time(apply(X, 2, mean))


############################################
# 16) EXPLORATORY DATA ANALYSIS
############################################

data("Journals")

Journals$citeprice = Journals$price/Journals$citations

attach(Journals)

plot(log(subs), log(citeprice))

rug(log(subs))

rug(log(citeprice), side = 2)

detach(Journals)


plot(log(subs) ~ log(citeprice), data = Journals)


plot(log(subs) ~ log(citeprice), data = Journals, pch = 20,
     col = "blue", ylim = c(0, 8), xlim = c(-7, 4),
     main = "Library subscriptions")


############################################
# 17) SALVANDO UM GRAFICO
############################################
pdf("myfile.pdf", height = 5, width = 6)
plot(1:20, pch = 1:20, col = 1:20, cex = 2)
dev.off()


data("CPS1985")

str(CPS1985)

head(CPS1985)

levels(CPS1985$occupation)[c(2, 6)] <- c("techn", "mgmt")

attach(CPS1985)


############################################
# 18) ESTATISTICAS BASICAS
############################################

summary(wage)

mean(wage)

median(wage)

var(wage)

sd(wage)


hist(wage, freq = FALSE)

hist(log(wage), freq = FALSE)

lines(density(log(wage)), col = 4)

summary(occupation)

tab = table(occupation)

prop.table(tab)

barplot(tab)

pie(tab)

xtabs(~ gender + occupation, data = CPS1985)

table(gender, occupation)

plot(gender ~ occupation, data = CPS1985)


cor(log(wage), education)

cor(log(wage), education, method = "spearman")

plot(log(wage) ~ education)