Este script tem como objetivo realizar a importação dos dados brutos e aplicar os primeiros tratamentos necessários, como seleção de colunas relevantes, ajustes em nomes de variáveis, padronização de formatos e tipos de dados, além da criação de variáveis auxiliares que serão utilizadas nas análises subsequentes.
library(tidyverse)
library(geobr)
library(ggpubr)
library(sf)
# source("../R/functions.R")
theme_set(theme_bw())
data_set <- read_rds("../data/data-set-fco2.rds") |>
filter(prof == "0-0.1") |> # Deixar apenas a primeira profundidade de 0-10 cm
mutate(
longitude_muni = long, # vamos deixar apenas um par de coordenadas
latitude_muni = lat,
manejo = as_factor(manejo),
tratamento = as_factor(tratamento)
) |>
rename(
xco2 = xco2_detrend_5,
xco2_trend = xco2_5,
sif = sif_5,
ph = p_h
) |>
select(-c(prof, long, lat, id, dist, # retirando variáveis
estado, municipio, xco2_1, sif_1, xco2_detrend_1,
data_preparo, conversao, cobertura, revolvimento_solo)) |>
relocate(data, year, month, cultura, x, y, longitude_muni, latitude_muni) |>
group_by(data) |>
mutate(
fco2 = ifelse(fco2<=0, median(fco2,na.rm=TRUE), fco2),
fco2 = ifelse(fco2>20, median(fco2,na.rm=TRUE), fco2),
ts = ifelse(ts>40, median(ts,na.rm=TRUE), ts),
macro = ifelse(macro<=0, median(macro,na.rm=TRUE), macro),
vtp = ifelse(vtp<=0, median(vtp,na.rm=TRUE), vtp),
pla = ifelse(pla<=0, median(pla,na.rm=TRUE), pla),
sb = ifelse(sb >= 150, median(sb,na.rm=TRUE),sb ),
mg = ifelse(mg >= 50, median(mg,na.rm=TRUE),mg ),
ca = ifelse(ca >= 80, median(ca,na.rm=TRUE),ca ),
p = ifelse(p >= 160, median(p,na.rm=TRUE),p ),
ph = ifelse(ph == 52, 5.2,ph ),
k = ifelse(k == 34, 0.34,k ),
at = ifelse(at > 1000, at/10,at ),
arg = ifelse(arg > 1000, arg/10,arg ),
silte = 1000-arg-at,
silte = ifelse(year == 2014, NA,silte),
arg = ifelse(year == 2014, NA,arg),
at = ifelse(year == 2014, NA,at),
macro = ifelse(macro < 1, macro*100,macro),
micro = ifelse(micro < 1, micro*100,micro),
vtp = ifelse(vtp < 1, vtp*100,vtp),
) |>
ungroup()
skimr::skim(data_set)
Name | data_set |
Number of rows | 14977 |
Number of columns | 51 |
_______________________ | |
Column type frequency: | |
character | 2 |
factor | 2 |
numeric | 46 |
POSIXct | 1 |
________________________ | |
Group variables | None |
Variable type: character
skim_variable | n_missing | complete_rate | min | max | empty | n_unique | whitespace |
---|---|---|---|---|---|---|---|
cultura | 0 | 1 | 4 | 14 | 0 | 11 | 0 |
experimento | 0 | 1 | 8 | 8 | 0 | 2 | 0 |
Variable type: factor
skim_variable | n_missing | complete_rate | ordered | n_unique | top_counts |
---|---|---|---|---|---|
manejo | 0 | 1 | FALSE | 10 | ref: 6072, can: 3984, con: 1484, pla: 840 |
tratamento | 0 | 1 | FALSE | 21 | CC: 3984, SI: 2492, EU: 2320, PD: 840 |
Variable type: numeric
skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
---|---|---|---|---|---|---|---|---|---|---|
year | 0 | 1.00 | 2012.66 | 5.35 | 2001.00 | 2008.00 | 2015.00 | 2017.00 | 2019.00 | ▂▂▁▆▇ |
month | 0 | 1.00 | 7.25 | 2.79 | 1.00 | 6.00 | 7.00 | 10.00 | 12.00 | ▂▂▇▃▅ |
x | 0 | 1.00 | 1408852.86 | 2962685.35 | 0.00 | 0.00 | 27.00 | 80.00 | 7749472.16 | ▇▁▁▁▂ |
y | 0 | 1.00 | 295780.57 | 1258074.22 | 0.00 | 0.00 | 25.50 | 80.00 | 7630525.47 | ▇▁▁▁▁ |
longitude_muni | 0 | 1.00 | -50.39 | 1.76 | -51.84 | -51.84 | -51.84 | -48.20 | -48.08 | ▇▁▁▁▅ |
latitude_muni | 0 | 1.00 | -20.67 | 0.54 | -21.40 | -21.35 | -20.25 | -20.25 | -20.00 | ▅▁▁▁▇ |
fco2 | 110 | 0.99 | 2.76 | 2.01 | 0.02 | 1.29 | 2.14 | 3.75 | 18.40 | ▇▂▁▁▁ |
ts | 317 | 0.98 | 21.77 | 5.90 | 1.00 | 19.40 | 22.60 | 26.20 | 39.70 | ▁▁▇▆▁ |
us | 1754 | 0.88 | 16.12 | 8.80 | 0.00 | 10.00 | 14.00 | 22.00 | 89.00 | ▇▅▁▁▁ |
ph | 2382 | 0.84 | 4.62 | 0.61 | 3.50 | 4.00 | 4.50 | 5.15 | 6.50 | ▇▆▆▃▁ |
mo | 1355 | 0.91 | 22.18 | 12.32 | 1.49 | 15.00 | 23.00 | 29.00 | 61.26 | ▅▇▇▂▁ |
p | 1355 | 0.91 | 19.38 | 20.21 | 1.00 | 6.00 | 15.00 | 27.00 | 151.00 | ▇▁▁▁▁ |
k | 1348 | 0.91 | 2.46 | 2.15 | 0.04 | 1.00 | 1.70 | 3.45 | 12.50 | ▇▂▂▁▁ |
ca | 1376 | 0.91 | 17.56 | 14.41 | 1.19 | 6.00 | 11.35 | 26.00 | 75.00 | ▇▃▂▁▁ |
mg | 1376 | 0.91 | 10.37 | 5.25 | 0.42 | 7.00 | 10.00 | 13.20 | 34.00 | ▅▇▃▁▁ |
h_al | 1362 | 0.91 | 48.24 | 28.80 | 0.00 | 28.00 | 42.29 | 72.00 | 121.00 | ▅▇▆▂▂ |
sb | 1376 | 0.91 | 30.36 | 19.52 | 1.75 | 16.30 | 24.60 | 42.41 | 100.68 | ▇▆▃▂▁ |
ctc | 1369 | 0.91 | 79.25 | 31.11 | 5.18 | 61.60 | 85.70 | 103.60 | 173.30 | ▂▃▇▃▁ |
v | 1383 | 0.91 | 41.21 | 20.07 | 4.96 | 21.00 | 43.00 | 57.60 | 100.00 | ▇▆▇▅▁ |
ds | 3284 | 0.78 | 1.39 | 0.17 | 0.88 | 1.24 | 1.38 | 1.52 | 1.86 | ▁▇▇▇▁ |
macro | 3277 | 0.78 | 11.65 | 7.04 | 0.56 | 6.99 | 10.54 | 15.20 | 89.00 | ▇▂▁▁▁ |
micro | 3298 | 0.78 | 34.54 | 6.75 | 7.00 | 31.50 | 35.63 | 39.12 | 52.42 | ▁▁▅▇▁ |
vtp | 3298 | 0.78 | 46.01 | 7.27 | 15.00 | 40.81 | 46.25 | 51.34 | 87.80 | ▁▆▇▁▁ |
pla | 3438 | 0.77 | 29.84 | 11.49 | 0.10 | 21.94 | 32.60 | 38.33 | 79.80 | ▂▅▇▁▁ |
at | 8275 | 0.45 | 495.68 | 223.25 | 132.50 | 355.93 | 446.05 | 657.84 | 872.75 | ▇▇▅▇▆ |
silte | 8282 | 0.45 | 100.64 | 69.49 | 1.25 | 50.86 | 77.87 | 138.20 | 305.00 | ▇▇▃▁▂ |
arg | 8247 | 0.45 | 404.21 | 166.33 | 69.21 | 302.96 | 449.55 | 519.62 | 689.01 | ▅▂▅▇▅ |
hlifs | 10452 | 0.30 | 14590.11 | 17253.55 | 158.39 | 1110.15 | 2409.80 | 29707.78 | 84692.90 | ▇▃▁▁▁ |
xco2_trend | 7483 | 0.50 | 400.35 | 4.23 | 394.34 | 395.81 | 399.91 | 403.92 | 409.01 | ▇▅▅▆▂ |
xco2 | 7483 | 0.50 | 386.12 | 1.54 | 383.57 | 385.36 | 385.88 | 387.17 | 388.82 | ▇▇▇▇▇ |
sif | 7483 | 0.50 | 0.67 | 0.32 | 0.29 | 0.38 | 0.57 | 0.86 | 1.42 | ▇▇▁▂▂ |
tmed | 4103 | 0.73 | 23.79 | 4.35 | 12.60 | 21.60 | 24.50 | 27.20 | 31.50 | ▂▂▇▇▅ |
tmax | 7963 | 0.47 | 32.13 | 4.32 | 20.70 | 30.02 | 32.60 | 35.10 | 40.40 | ▂▂▇▇▃ |
tmin | 7963 | 0.47 | 19.04 | 4.01 | 5.10 | 16.10 | 19.80 | 22.40 | 25.90 | ▁▁▇▆▇ |
umed | 4103 | 0.73 | 68.48 | 12.57 | 34.60 | 59.60 | 68.20 | 78.50 | 94.70 | ▂▅▇▇▃ |
umax | 7963 | 0.47 | 93.56 | 6.71 | 71.60 | 90.50 | 95.70 | 99.40 | 100.00 | ▁▁▂▃▇ |
umin | 7963 | 0.47 | 46.28 | 12.26 | 17.90 | 37.30 | 46.30 | 54.00 | 87.80 | ▂▇▇▂▁ |
pk_pa | 4103 | 0.73 | 96.48 | 1.47 | 94.01 | 94.70 | 97.30 | 97.60 | 98.40 | ▆▂▁▇▆ |
rad | 4422 | 0.70 | 16.14 | 4.85 | 3.50 | 12.70 | 15.80 | 20.50 | 27.90 | ▁▆▇▆▂ |
par | 6891 | 0.54 | 284.33 | 142.31 | 8.80 | 233.70 | 294.40 | 388.60 | 515.50 | ▃▁▇▃▃ |
eto | 4271 | 0.71 | 4.50 | 2.12 | 0.92 | 2.90 | 4.20 | 5.50 | 12.10 | ▇▇▃▂▁ |
velmax | 7963 | 0.47 | 5.56 | 1.93 | 3.10 | 4.50 | 5.10 | 6.20 | 16.60 | ▇▃▁▁▁ |
velmin | 7963 | 0.47 | 1.19 | 0.49 | 0.20 | 0.80 | 1.10 | 1.40 | 2.60 | ▂▇▅▂▁ |
dir_vel | 7963 | 0.47 | 138.05 | 83.73 | 27.50 | 80.80 | 101.20 | 213.80 | 358.30 | ▇▇▁▃▂ |
chuva | 4103 | 0.73 | 1.25 | 4.32 | 0.00 | 0.00 | 0.00 | 0.00 | 36.00 | ▇▁▁▁▁ |
inso | 4103 | 0.73 | 6.57 | 2.80 | 0.00 | 4.80 | 6.80 | 8.70 | 11.10 | ▂▃▆▇▆ |
Variable type: POSIXct
skim_variable | n_missing | complete_rate | min | max | median | n_unique |
---|---|---|---|---|---|---|
data | 0 | 1 | 2001-07-10 | 2019-12-01 | 2015-10-06 | 205 |
visdat::vis_miss(data_set)
data_set |>
mutate(fco2_log = log(fco2)) |>
select(fco2,fco2_log,ts,us) |>
GGally::ggpairs()
data_set |>
mutate(fco2_log = log(fco2)) |>
select(fco2,fco2_log,ph:h_al) |>
GGally::ggpairs()
data_set |>
mutate(fco2_log = log(fco2)) |>
select(fco2,fco2_log,sb:pla) |>
GGally::ggpairs()
data_set |>
mutate(fco2_log = log(fco2)) |>
select(fco2,fco2_log,at:hlifs) |>
GGally::ggpairs()
data_set |>
mutate(fco2_log = log(fco2)) |>
select(fco2,fco2_log,xco2_trend:umin) |>
GGally::ggpairs()
data_set |>
mutate(fco2_log = log(fco2)) |>
select(fco2,fco2_log,pk_pa:inso) |>
GGally::ggpairs()
my_vars <- data_set |>
select(fco2:inso) |>
names()
map(my_vars,~{
var_sym <- sym(.x)
x <- data_set |>pull(!!var_sym)
x<-na.omit(x)
plot_x<-data_set |>
select(!!.x) |>
drop_na() |>
ggplot(aes(x=!!var_sym)) +
geom_histogram(boundary=0, color="black", fill="gray",
bins = nclass.FD(x))
print(plot_x)
# print(summary(x))
})
## [[1]]
##
## [[2]]
##
## [[3]]
##
## [[4]]
##
## [[5]]
##
## [[6]]
##
## [[7]]
##
## [[8]]
##
## [[9]]
##
## [[10]]
##
## [[11]]
##
## [[12]]
##
## [[13]]
##
## [[14]]
##
## [[15]]
##
## [[16]]
##
## [[17]]
##
## [[18]]
##
## [[19]]
##
## [[20]]
##
## [[21]]
##
## [[22]]
##
## [[23]]
##
## [[24]]
##
## [[25]]
##
## [[26]]
##
## [[27]]
##
## [[28]]
##
## [[29]]
##
## [[30]]
##
## [[31]]
##
## [[32]]
##
## [[33]]
##
## [[34]]
##
## [[35]]
##
## [[36]]
##
## [[37]]
##
## [[38]]
##
## [[39]]
##
## [[40]]
data_set |>
select(cultura,year, hlifs) |> drop_na() |> arrange() |>
group_by(year,cultura) |>
summarise(
hlifs = mean(hlifs)
)
## # A tibble: 10 × 3
## # Groups: year [5]
## year cultura hlifs
## <dbl> <chr> <dbl>
## 1 2007 cana-de-acucar 205.
## 2 2008 cana-de-acucar 2215.
## 3 2017 cerrado 28019.
## 4 2017 eucalipto 38916.
## 5 2017 pinus 49523.
## 6 2017 silvipastoril 29867.
## 7 2018 pasto 2199.
## 8 2018 silvipastoril 8353.
## 9 2019 pasto 2482.
## 10 2019 silvipastoril 29675.