Skip to main content
R

R #

I have not used R for a long time. Proceed at your own risk.

Moved from: (Private repo) loikein/my-jupyter-notes

(Maybe) Useful resources #

Configuration #

Set language on macOS: (credit)

# (In .zshrc)

export LANG=en_US.UTF-8

Check all environment variables:

Sys.getenv()

Sys.getlocale()

Check the location of RHOME:

R RHOME

Download and manage packages #

Avoid (or force) installing from source #

# set CRAN
# https://stackoverflow.com/a/30490863/10668706
chooseCRANmirror(graphics=FALSE, ind=81)

# do not build from source
install.packages("ggplot2", type="binary")

# build from source
install.packages("ggplot2", type="source")

List of common packages #

Install:

## plot
install.packages("gapminder")
install.packages("dplyr")
install.packages("ggplot2")

## import
install.packages("readr")
install.packages("data.table")
install.packages("readxl")
install.packages("gdata")
install.packages("DBI")
install.packages("httr")
install.packages("jsonlite")
install.packages("haven")
install.packages("foreign")

## clean
install.packages("tidyr")
install.packages("lubridate")
install.packages("stringr")

## function
install.packages("purrr")

Load:

## plot
library(gapminder)
library(ggplot2)

## import
library(readr)
library(data.table)
library(readxl)
library(gdata)
library(DBI)
library(httr)
library(jsonlite)
library(haven)
library(foreign)

## data
library(dplyr)
library(hflights)

## clean
library(tidyr)
library(lubridate)
library(stringr)

## function
library(purrr)

## trading models
library(quantmod)

Package: base #

c() #

Append: (credit)

vector <- c()
values <- c('a','b','c','d','e','f','g')

for (v in values){
  vector <- c(vector, v)
}

Package: distr #

Make distribution: (credit)

mean <- 1
sigma <- 0.25

(n <- Norm(mean=mean,sd=sigma))

# Distribution Object of Class: Norm
#  mean: 1
#  sd: 0.25

Data.frame #

Import #

Doc: read.text function - RDocumentation

mydata <- read.table('some_data.csv', sep=",", header = 1)
head(mydata, 5)

Export #

Doc: write.table function - RDocumentation

write.csv(mydata, "mydata.csv")

Inspect #

Determine type of columns: (ref)

sapply(test_df, class)

sapply(test_df, typeof) # numeric -> double

View rows: (ref)

test_df[30:40,]

Drop columns #

Ref: r - Drop data frame columns by name - Stack Overflow

drops <- c("drop_1","drop_2", ...)
test_df <- test_df[ , !(names(test_df) %in% drops)]

Commands to adjust data types #

# string -> numeric
# https://stackoverflow.com/a/24119941/10668706
test_df_2 <- transform(test_df,id=as.numeric(factor(ID)))

# categorical -> dummy
# mostly no need to use
test_df_2 <- fastDummies::dummy_cols(test_df, select_columns="Rating")

# logarithm
test_df_2$number_log <-log(test_df$number)

# replace value
test_df_2$Score[test_df_2$Score==1] <- NA

Binder #

I have stopped using Binder. Proceed at your own risk.

Refs:

runtime.txt:

r-2018-11-16

requirements.txt:

jupyter_contrib_nbextensions

postBuild:

jupyter contrib nbextension install --user
jupyter nbextension enable toc2
jupyter trust binder-r-test.ipynb

RStudio #

Do not create HTML document #

Doc: 3.1 HTML document | R Markdown: The Definitive Guide

---
title: "R Notebook"
# output: html_notebook
---

Theme #

Doc: RStudio User Guide - Themes

Themes: