Objetivo

Este script tem como objetivo realizar a importação dos dados brutos e aplicar os primeiros tratamentos necessários, como seleção de colunas relevantes, ajustes em nomes de variáveis, padronização de formatos e tipos de dados, além da criação de variáveis auxiliares que serão utilizadas nas análises subsequentes.

Carregando Pacotes

library(tidyverse)
library(geobr)
library(ggpubr)
library(sf)
# source("../R/functions.R")
theme_set(theme_bw())

Importação de Dados

data_set <- read_rds("../data/data-set-fco2.rds") |> 
  filter(prof == "0-0.1") |> # Deixar apenas a primeira profundidade de 0-10 cm
  mutate(
    longitude_muni = long, # vamos deixar apenas um par de coordenadas
    latitude_muni = lat,
    manejo = as_factor(manejo),
    tratamento = as_factor(tratamento)
  ) |> 
  rename(
    xco2 = xco2_detrend_5,
    xco2_trend = xco2_5,
    sif = sif_5,
    ph = p_h
  ) |> 
  select(-c(prof, long, lat, id, dist, # retirando variáveis
            estado, municipio, xco2_1, sif_1, xco2_detrend_1,
            data_preparo, conversao, cobertura, revolvimento_solo)) |> 
  relocate(data, year, month, cultura, x, y, longitude_muni, latitude_muni) |> 
  group_by(data) |> 
  mutate(
    fco2 = ifelse(fco2<=0, median(fco2,na.rm=TRUE), fco2),
    fco2 = ifelse(fco2>20, median(fco2,na.rm=TRUE), fco2),
    ts = ifelse(ts>40, median(ts,na.rm=TRUE), ts),
    macro = ifelse(macro<=0, median(macro,na.rm=TRUE), macro),
    vtp = ifelse(vtp<=0, median(vtp,na.rm=TRUE), vtp),
    pla = ifelse(pla<=0, median(pla,na.rm=TRUE), pla),
    sb = ifelse(sb >= 150, median(sb,na.rm=TRUE),sb ),
    mg = ifelse(mg >= 50, median(mg,na.rm=TRUE),mg ),
    ca = ifelse(ca >= 80, median(ca,na.rm=TRUE),ca ),
    p = ifelse(p >= 160, median(p,na.rm=TRUE),p ),
    ph = ifelse(ph == 52, 5.2,ph ),
    k = ifelse(k == 34, 0.34,k ),
    at = ifelse(at > 1000, at/10,at ),
    arg = ifelse(arg > 1000, arg/10,arg ),
    silte = 1000-arg-at,
    silte = ifelse(year == 2014, NA,silte),
    arg = ifelse(year == 2014, NA,arg),
    at = ifelse(year == 2014, NA,at),
    macro = ifelse(macro < 1, macro*100,macro),
    micro = ifelse(micro < 1, micro*100,micro),
    vtp = ifelse(vtp < 1, vtp*100,vtp),
    ) |> 
  ungroup()

skimr::skim(data_set)
Data summary
Name data_set
Number of rows 14977
Number of columns 51
_______________________
Column type frequency:
character 2
factor 2
numeric 46
POSIXct 1
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
cultura 0 1 4 14 0 11 0
experimento 0 1 8 8 0 2 0

Variable type: factor

skim_variable n_missing complete_rate ordered n_unique top_counts
manejo 0 1 FALSE 10 ref: 6072, can: 3984, con: 1484, pla: 840
tratamento 0 1 FALSE 21 CC: 3984, SI: 2492, EU: 2320, PD: 840

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
year 0 1.00 2012.66 5.35 2001.00 2008.00 2015.00 2017.00 2019.00 ▂▂▁▆▇
month 0 1.00 7.25 2.79 1.00 6.00 7.00 10.00 12.00 ▂▂▇▃▅
x 0 1.00 1408852.86 2962685.35 0.00 0.00 27.00 80.00 7749472.16 ▇▁▁▁▂
y 0 1.00 295780.57 1258074.22 0.00 0.00 25.50 80.00 7630525.47 ▇▁▁▁▁
longitude_muni 0 1.00 -50.39 1.76 -51.84 -51.84 -51.84 -48.20 -48.08 ▇▁▁▁▅
latitude_muni 0 1.00 -20.67 0.54 -21.40 -21.35 -20.25 -20.25 -20.00 ▅▁▁▁▇
fco2 110 0.99 2.76 2.01 0.02 1.29 2.14 3.75 18.40 ▇▂▁▁▁
ts 317 0.98 21.77 5.90 1.00 19.40 22.60 26.20 39.70 ▁▁▇▆▁
us 1754 0.88 16.12 8.80 0.00 10.00 14.00 22.00 89.00 ▇▅▁▁▁
ph 2382 0.84 4.62 0.61 3.50 4.00 4.50 5.15 6.50 ▇▆▆▃▁
mo 1355 0.91 22.18 12.32 1.49 15.00 23.00 29.00 61.26 ▅▇▇▂▁
p 1355 0.91 19.38 20.21 1.00 6.00 15.00 27.00 151.00 ▇▁▁▁▁
k 1348 0.91 2.46 2.15 0.04 1.00 1.70 3.45 12.50 ▇▂▂▁▁
ca 1376 0.91 17.56 14.41 1.19 6.00 11.35 26.00 75.00 ▇▃▂▁▁
mg 1376 0.91 10.37 5.25 0.42 7.00 10.00 13.20 34.00 ▅▇▃▁▁
h_al 1362 0.91 48.24 28.80 0.00 28.00 42.29 72.00 121.00 ▅▇▆▂▂
sb 1376 0.91 30.36 19.52 1.75 16.30 24.60 42.41 100.68 ▇▆▃▂▁
ctc 1369 0.91 79.25 31.11 5.18 61.60 85.70 103.60 173.30 ▂▃▇▃▁
v 1383 0.91 41.21 20.07 4.96 21.00 43.00 57.60 100.00 ▇▆▇▅▁
ds 3284 0.78 1.39 0.17 0.88 1.24 1.38 1.52 1.86 ▁▇▇▇▁
macro 3277 0.78 11.65 7.04 0.56 6.99 10.54 15.20 89.00 ▇▂▁▁▁
micro 3298 0.78 34.54 6.75 7.00 31.50 35.63 39.12 52.42 ▁▁▅▇▁
vtp 3298 0.78 46.01 7.27 15.00 40.81 46.25 51.34 87.80 ▁▆▇▁▁
pla 3438 0.77 29.84 11.49 0.10 21.94 32.60 38.33 79.80 ▂▅▇▁▁
at 8275 0.45 495.68 223.25 132.50 355.93 446.05 657.84 872.75 ▇▇▅▇▆
silte 8282 0.45 100.64 69.49 1.25 50.86 77.87 138.20 305.00 ▇▇▃▁▂
arg 8247 0.45 404.21 166.33 69.21 302.96 449.55 519.62 689.01 ▅▂▅▇▅
hlifs 10452 0.30 14590.11 17253.55 158.39 1110.15 2409.80 29707.78 84692.90 ▇▃▁▁▁
xco2_trend 7483 0.50 400.35 4.23 394.34 395.81 399.91 403.92 409.01 ▇▅▅▆▂
xco2 7483 0.50 386.12 1.54 383.57 385.36 385.88 387.17 388.82 ▇▇▇▇▇
sif 7483 0.50 0.67 0.32 0.29 0.38 0.57 0.86 1.42 ▇▇▁▂▂
tmed 4103 0.73 23.79 4.35 12.60 21.60 24.50 27.20 31.50 ▂▂▇▇▅
tmax 7963 0.47 32.13 4.32 20.70 30.02 32.60 35.10 40.40 ▂▂▇▇▃
tmin 7963 0.47 19.04 4.01 5.10 16.10 19.80 22.40 25.90 ▁▁▇▆▇
umed 4103 0.73 68.48 12.57 34.60 59.60 68.20 78.50 94.70 ▂▅▇▇▃
umax 7963 0.47 93.56 6.71 71.60 90.50 95.70 99.40 100.00 ▁▁▂▃▇
umin 7963 0.47 46.28 12.26 17.90 37.30 46.30 54.00 87.80 ▂▇▇▂▁
pk_pa 4103 0.73 96.48 1.47 94.01 94.70 97.30 97.60 98.40 ▆▂▁▇▆
rad 4422 0.70 16.14 4.85 3.50 12.70 15.80 20.50 27.90 ▁▆▇▆▂
par 6891 0.54 284.33 142.31 8.80 233.70 294.40 388.60 515.50 ▃▁▇▃▃
eto 4271 0.71 4.50 2.12 0.92 2.90 4.20 5.50 12.10 ▇▇▃▂▁
velmax 7963 0.47 5.56 1.93 3.10 4.50 5.10 6.20 16.60 ▇▃▁▁▁
velmin 7963 0.47 1.19 0.49 0.20 0.80 1.10 1.40 2.60 ▂▇▅▂▁
dir_vel 7963 0.47 138.05 83.73 27.50 80.80 101.20 213.80 358.30 ▇▇▁▃▂
chuva 4103 0.73 1.25 4.32 0.00 0.00 0.00 0.00 36.00 ▇▁▁▁▁
inso 4103 0.73 6.57 2.80 0.00 4.80 6.80 8.70 11.10 ▂▃▆▇▆

Variable type: POSIXct

skim_variable n_missing complete_rate min max median n_unique
data 0 1 2001-07-10 2019-12-01 2015-10-06 205
visdat::vis_miss(data_set)

data_set  |> 
  mutate(fco2_log = log(fco2)) |> 
  select(fco2,fco2_log,ts,us) |> 
  GGally::ggpairs()

data_set  |> 
  mutate(fco2_log = log(fco2)) |> 
  select(fco2,fco2_log,ph:h_al) |> 
  GGally::ggpairs()

data_set  |> 
  mutate(fco2_log = log(fco2)) |> 
  select(fco2,fco2_log,sb:pla) |> 
  GGally::ggpairs()

data_set  |> 
  mutate(fco2_log = log(fco2)) |> 
  select(fco2,fco2_log,at:hlifs) |> 
  GGally::ggpairs()

data_set  |> 
  mutate(fco2_log = log(fco2)) |> 
  select(fco2,fco2_log,xco2_trend:umin) |> 
  GGally::ggpairs()

data_set  |> 
  mutate(fco2_log = log(fco2)) |> 
  select(fco2,fco2_log,pk_pa:inso) |> 
  GGally::ggpairs()

Histogramas

my_vars <- data_set |> 
  select(fco2:inso) |> 
  names()
map(my_vars,~{
  var_sym <- sym(.x)
  x <- data_set |>pull(!!var_sym)
  x<-na.omit(x)
  plot_x<-data_set |>
    select(!!.x) |>
    drop_na() |>
    ggplot(aes(x=!!var_sym)) +
    geom_histogram(boundary=0, color="black", fill="gray",
                   bins = nclass.FD(x))
  print(plot_x)
  # print(summary(x))
})

## [[1]]

## 
## [[2]]

## 
## [[3]]

## 
## [[4]]

## 
## [[5]]

## 
## [[6]]

## 
## [[7]]

## 
## [[8]]

## 
## [[9]]

## 
## [[10]]

## 
## [[11]]

## 
## [[12]]

## 
## [[13]]

## 
## [[14]]

## 
## [[15]]

## 
## [[16]]

## 
## [[17]]

## 
## [[18]]

## 
## [[19]]

## 
## [[20]]

## 
## [[21]]

## 
## [[22]]

## 
## [[23]]

## 
## [[24]]

## 
## [[25]]

## 
## [[26]]

## 
## [[27]]

## 
## [[28]]

## 
## [[29]]

## 
## [[30]]

## 
## [[31]]

## 
## [[32]]

## 
## [[33]]

## 
## [[34]]

## 
## [[35]]

## 
## [[36]]

## 
## [[37]]

## 
## [[38]]

## 
## [[39]]

## 
## [[40]]

data_set |> 
select(cultura,year, hlifs) |> drop_na() |> arrange() |> 
group_by(year,cultura) |> 
  summarise(
  hlifs = mean(hlifs)
  )
## # A tibble: 10 × 3
## # Groups:   year [5]
##     year cultura         hlifs
##    <dbl> <chr>           <dbl>
##  1  2007 cana-de-acucar   205.
##  2  2008 cana-de-acucar  2215.
##  3  2017 cerrado        28019.
##  4  2017 eucalipto      38916.
##  5  2017 pinus          49523.
##  6  2017 silvipastoril  29867.
##  7  2018 pasto           2199.
##  8  2018 silvipastoril   8353.
##  9  2019 pasto           2482.
## 10  2019 silvipastoril  29675.