P1: DataFrame
Student_ID <- c()
for(i in 1001:1010) {
+ Student_ID <- append(Student_ID, i)
+ }
Student_ID
Name <- c("jim", "jacy", "ben", "lexi", "john", "suzan", "lee", "emma", "drax", "alice")
Name
Marks <- c(23,34,54,35,65,34,76,45,87,88)
Marks
Gender <- c("male", "female", "male", "female", "male", "female", "male", "female", "male", "female")
Gender
stringAsFactors = FALSE
df <- data.frame(Student_ID, Name, Marks, Gender)
df
str(df)
summary(df)
extra <- c(1011, "jazz", 44, "male")
df[nrow(df) + 1,] <- extra
df
data.frame(df$Student_ID, df$Marks)
-----------------------------------------------------------------------------------------------------------------------------
-----------------------------------------------------------------------------------------------------------------------------
P2: Import Export
install.packages("writexl")
library("writexl")
Student_ID <- c()
for(i in 1001:1010) {
+ Student_ID <- append(Student_ID, i)
+ }
Name <- c("jim", "jacy", "ben", "lexi", "john", "suzan", "lee", "emma", "drax", "alice")
Marks <- c(23,34,54,35,65,34,76,45,87,88)
Gender <- c("male", "female", "male", "female", "male", "female", "male", "female", "male", "female")
stringAsFactors = FALSE
df <- data.frame(Student_ID, Name, Marks, Gender)
write.csv(df, file = "D:/University/Sem 4/MFDA/PR/pr2_try1.csv")
write.table(df, file = "D:/University/Sem 4/MFDA/PR/pr2_try2.txt")
df_read1 <- read.csv("D:/University/Sem 4/MFDA/PR/pr2_try1.csv")
df_read2 <- read.table(file = "D:/University/Sem 4/MFDA/PR/pr2_try2.txt")
-----------------------------------------------------------------------------------------------------------------------------
-----------------------------------------------------------------------------------------------------------------------------
P3: Central Tendencies
mean(mtcars$cyl)
mean(mtcars$mpg)
mean(mtcars$gear)
mean(mtcars$disp)
mode<-function(x){which.max(tabulate(x))}
mode(mtcars$cyl)
median(mtcars$cyl)
var(mtcars$cyl)
sd(mtcars$cyl)
boxplot(mpg ~ cyl, data = mtcars, xlab = "Number of Cylinders", ylab = "Miles Per Gallon", main = "Mileage Data")
png(file = "C:/Users/HP/Documents/MFDATrash/line_chart1.jpg")
plot(mtcars$cyl, type = "o", xlab = "Index", ylab = "Cylinders", main = "Cylinders in mtcars DataSet")
dev.off()
png(file = "C:/Users/HP/Documents/MFDATrash/dotplot1.jpg")
dotchart(mtcars$cyl, labels = rownames(mtcars))
dev.off()
hist(mtcars$mpg, xlab = "Miles Per Gallon", ylab = "Number of Cars", main = "Cars Distribution")
hist(mtcars$cyl, xlab = "Cylinders", main = "Histogram for Cylinder in mtcars")
barplot(mtcars$cyl, xlab = "Cars", ylab = "Cylinders", main = "Barplot: Cars and Number of cyl")
barplot (table(mtcars$cyl),
main = "Car Distribution",
xlab = "Number of Cylinders",
col = c("darkblue", "green", "red"),
names.arg = c("4 Cylinder", "6 Cylinder", "8 Cylinder"))
pie(table (mtcars$cyl), labels = c("4 Cylinder", "6 Cylinder", "8 Cylinder"), main="Car Distribution")
plot(x = input$cyl, y = input$mpg, xlab = "Number of Cylinders", ylab = "Miles Per Gallon", main = "Scatterplot: Cylinders vs MPG")
-----------------------------------------------------------------------------------------------------------------------------
-----------------------------------------------------------------------------------------------------------------------------
P4: Advanced Visual
sapply(mtcars, function(mtcars) max(mtcars, na.rm = TRUE) - min(mtcars, na.rm = TRUE))
order(mtcars$mpg)
sort(mtcars$mpg)
IQR(mtcars$mpg)
quantile(mtcars$wt, probs = c(0,0.25,0.5,0.75,1))
quantile(mtcars$wt, probs = c(.75, .8))
boxplot(mpg ~ cyl, data = mtcars, xlab = "Number of Cylinders", ylab = "Miles Per Gallon", main = "Mileage Data")
a) 8 cylinders are needed for lowest milage per gallon
b) Prediction with 6 number of cylinders gives maximum confidence
-----------------------------------------------------------------------------------------------------------------------------
-----------------------------------------------------------------------------------------------------------------------------
P5: Prob Dis
x = 0:5
barplot(height = dbinom(x,5,0.4), names.arg = c(0,1,2,3,4,5), xlab = "Number of Late Arrivals", ylab = "Probability", main = "Problem 1")
x = 0:5
barplot(dbinom(x,5,0.1))
barplot(dbinom(x,5,0.3))
When prob is lower, i.e. 0.1: the data is right skewed. When prob increases, the data becomes left skewed. At 0.9, the data is left skewed.ype = "o") / barplot(dpois(x, 3))
x = 1:20
barplot(dbinom(x,20,0.4))
Keeping p = 0.4, and increasing n, the data follows normal distribution.
x <- c(0:4)
dpois(x, 5)
ppois(7, 5, lower.tail = TRUE)
ppois(7, 5, lower.tail = FALSE)
ppois(8,5)-ppois(4,5)
x <- c(1:50)
plot(x, dpois(x, 3), type = "o") / barplot(dpois(x, 3))
-----------------------------------------------------------------------------------------------------------------------------
-----------------------------------------------------------------------------------------------------------------------------
P6: Case Study of Prob Dis
x <- seq(0, 50, by = 1)
y <- dnorm(x, mean = 25.0, sd = 10)
plot(x,y, main = "Normal Distribution", col = "blue")
pnorm(27.5, mean = 22, sd = 29) - pnorm(16.2, mean = 22, sd = 29)
cat("The probability is less than 17: ", pnorm(17, mean = 22, sd = 29))
pnorm(17, mean = 22, sd = 29)
pnorm(15, mean = 22, sd = 29) + (1 - pnorm(25, mean = 22, sd = 29))
pnorm(850000, mean = 1000000, sd = 200000)
-----------------------------------------------------------------------------------------------------------------------------
-----------------------------------------------------------------------------------------------------------------------------
P7:
Algebra Op and Dist Met
install.packages("pracma")
a <-
c(1,2,3)
b <-
c(5,6,7)
dot(a,b)
cross(a,b)
d =
matrix(c(1,2,3), nrow=3, ncol=3, byrow=TRUE)
e =
matrix(c(4,5,6), nrow=3, ncol=3, byrow=TRUE)
cross(d, e)
f =
matrix(c(1,2,3), nrow=1, ncol=3, byrow=TRUE)
f
t(f)
install.packages("geometry")
install.packages("philentropy")
library(geometry)
library(philentropy)
x <-
c(50,5,2)
y <-
c(1,8,9)
p <-
rbind(x,y)
distance(p,
method = "euclidean")
distance(p,
method = "manhattan")
distance(p,
method = "jaccard")
print('Hamming
Distance')
sum(x
!= y)
d =
matrix(c(0.68567, 0.12975, -0.71626, 0.14807, 0.93855, 0.31176, 0.71269,
-0.31982, 0.62433), nrow=3, ncol=3, byrow=TRUE)
det(d)
install.packages("matlib")
library(matlib)
e = matrix(c(13,
-4, 2, -4, 11, -2, 2, -2, 8), nrow=3, ncol=3, byrow=TRUE)
ev <-
eigen(e)
(values <-
ev$values)
(vectors <-
ev$vectors)
sum(e^2)
sum(values^2)
det(e)
prod(values)
R(e)
sum(values !=
0)
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
P8: Sampling
Dis Techniques
#define number of samples
n = 10000
#create empty vector of length
n
sample_means = rep(NA, n)
#fill empty vector with means
for(i in 1:n){
sample_means[i] =
mean(rnorm(20, mean=5.3, sd=9))
}
#view first six sample means
head(sample_means) [o/p: [1]
4.304143 6.058354 7.803126 4.667816 4.379790 8.193655]
#create histogram to visualize
the sampling distribution
hist(sample_means, main =
"", xlab = "Sample Means", col = "steelblue")
#mean of sampling distribution
mean(sample_means)
#standard deviation of
sampling distribution
sd(sample_means)
#calculate probability that
sample mean is less than or equal to 6
sum(sample_means <= 6) /
length(sample_means)
-----------------------------------------------------------------------------------------------------------------------------
-----------------------------------------------------------------------------------------------------------------------------
P9:
Estimated Value
install.packages(“MASS”)
library(MASS)
height.survey = survey$Height
mean(height.survey, na.rn = TRUE)
height.response = na.omit(survey$Height)
n = length(height.response)
s = sd(height.response)
SE = s/sqrt(n)
SE
E = qt(.975, df = n-1)*SE
xbar = mean(height.response)
xbar + c(-E, E)
-----------------------------------------------------------------------------------------------------------------------------
-----------------------------------------------------------------------------------------------------------------------------
P10:
Hypothesis
xbar = 9900
mu0 = 10000
sigma = 120
n = 30
z = (xbar -
mu0) / (sigma/sqrt(n))
z
alpha = 0.05
z.alpha =
qnorm(1-alpha)
-z.alpha (reject)
xbar = 2.1
mu0 = 2
sigma = 0.25
n = 35
z = (xbar -
mu0) / (sigma/sqrt(n))
z
alpha = .05
z.alpha =
qnorm(1-alpha)
z.alpha
(reject)
0 Comments:
Post a Comment
Subscribe to Post Comments [Atom]
<< Home