Importação e Tratamento

Objetivo

Este script tem como objetivo realizar a importação dos dados brutos e aplicar os primeiros tratamentos necessários, como seleção de colunas relevantes, ajustes em nomes de variáveis, padronização de formatos e tipos de dados, além da criação de variáveis auxiliares que serão utilizadas nas análises subsequentes.

Carregando Pacotes

library(tidyverse)
library(geobr)
library(ggpubr)
library(sf)
# source("../R/functions.R")
theme_set(theme_bw())

Importação de Dados

data_set <- read_rds("../data/data-set-fco2.rds") |> 
  filter(prof == "0-0.1") |> # Deixar apenas a primeira profundidade de 0-10 cm
  mutate(
    longitude_muni = long, # vamos deixar apenas um par de coordenadas
    latitude_muni = lat,
    manejo = as_factor(manejo),
    tratamento = as_factor(tratamento)
  ) |> 
  rename(
    xco2 = xco2_detrend_5,
    xco2_trend = xco2_5,
    sif = sif_5,
    ph = p_h
  ) |> 
  select(-c(prof, long, lat, id, dist, # retirando variáveis
            estado, municipio, xco2_1, sif_1, xco2_detrend_1,
            data_preparo, conversao, cobertura, revolvimento_solo)) |> 
  relocate(data, year, month, cultura, x, y, longitude_muni, latitude_muni) |> 
  group_by(data) |> 
  mutate(
    fco2 = ifelse(fco2<=0, median(fco2,na.rm=TRUE), fco2),
    fco2 = ifelse(fco2>20, median(fco2,na.rm=TRUE), fco2),
    ts = ifelse(ts>40, median(ts,na.rm=TRUE), ts),
    macro = ifelse(macro<=0, median(macro,na.rm=TRUE), macro),
    vtp = ifelse(vtp<=0, median(vtp,na.rm=TRUE), vtp),
    pla = ifelse(pla<=0, median(pla,na.rm=TRUE), pla),
    sb = ifelse(sb >= 150, median(sb,na.rm=TRUE),sb ),
    mg = ifelse(mg >= 50, median(mg,na.rm=TRUE),mg ),
    ca = ifelse(ca >= 80, median(ca,na.rm=TRUE),ca ),
    p = ifelse(p >= 160, median(p,na.rm=TRUE),p ),
    ph = ifelse(ph == 52, 5.2,ph ),
    k = ifelse(k == 34, 0.34,k ),
    at = ifelse(at > 1000, at/10,at ),
    arg = ifelse(arg > 1000, arg/10,arg ),
    silte = 1000-arg-at,
    silte = ifelse(year == 2014, NA,silte),
    arg = ifelse(year == 2014, NA,arg),
    at = ifelse(year == 2014, NA,at),
    macro = ifelse(macro < 1, macro*100,macro),
    micro = ifelse(micro < 1, micro*100,micro),
    vtp = ifelse(vtp < 1, vtp*100,vtp),
    ) |> 
  ungroup()

skimr::skim(data_set)

Data summary
Name	data_set
Number of rows	14977
Number of columns	51
_______________________
Column type frequency:
character	2
factor	2
numeric	46
POSIXct	1
________________________
Group variables	None

Variable type: character

skim_variable	n_missing	complete_rate	min	max	empty	n_unique	whitespace
cultura	0	1	4	14	0	11	0
experimento	0	1	8	8	0	2	0

Variable type: factor

skim_variable	n_missing	complete_rate	ordered	n_unique	top_counts
manejo	0	1	FALSE	10	ref: 6072, can: 3984, con: 1484, pla: 840
tratamento	0	1	FALSE	21	CC: 3984, SI: 2492, EU: 2320, PD: 840

Variable type: numeric

skim_variable	n_missing	complete_rate	mean	sd	p0	p25	p50	p75	p100	hist
year	0	1.00	2012.66	5.35	2001.00	2008.00	2015.00	2017.00	2019.00	▂▂▁▆▇
month	0	1.00	7.25	2.79	1.00	6.00	7.00	10.00	12.00	▂▂▇▃▅
x	0	1.00	1408852.86	2962685.35	0.00	0.00	27.00	80.00	7749472.16	▇▁▁▁▂
y	0	1.00	295780.57	1258074.22	0.00	0.00	25.50	80.00	7630525.47	▇▁▁▁▁
longitude_muni	0	1.00	-50.39	1.76	-51.84	-51.84	-51.84	-48.20	-48.08	▇▁▁▁▅
latitude_muni	0	1.00	-20.67	0.54	-21.40	-21.35	-20.25	-20.25	-20.00	▅▁▁▁▇
fco2	110	0.99	2.76	2.01	0.02	1.29	2.14	3.75	18.40	▇▂▁▁▁
ts	317	0.98	21.77	5.90	1.00	19.40	22.60	26.20	39.70	▁▁▇▆▁
us	1754	0.88	16.12	8.80	0.00	10.00	14.00	22.00	89.00	▇▅▁▁▁
ph	2382	0.84	4.62	0.61	3.50	4.00	4.50	5.15	6.50	▇▆▆▃▁
mo	1355	0.91	22.18	12.32	1.49	15.00	23.00	29.00	61.26	▅▇▇▂▁
p	1355	0.91	19.38	20.21	1.00	6.00	15.00	27.00	151.00	▇▁▁▁▁
k	1348	0.91	2.46	2.15	0.04	1.00	1.70	3.45	12.50	▇▂▂▁▁
ca	1376	0.91	17.56	14.41	1.19	6.00	11.35	26.00	75.00	▇▃▂▁▁
mg	1376	0.91	10.37	5.25	0.42	7.00	10.00	13.20	34.00	▅▇▃▁▁
h_al	1362	0.91	48.24	28.80	0.00	28.00	42.29	72.00	121.00	▅▇▆▂▂
sb	1376	0.91	30.36	19.52	1.75	16.30	24.60	42.41	100.68	▇▆▃▂▁
ctc	1369	0.91	79.25	31.11	5.18	61.60	85.70	103.60	173.30	▂▃▇▃▁
v	1383	0.91	41.21	20.07	4.96	21.00	43.00	57.60	100.00	▇▆▇▅▁
ds	3284	0.78	1.39	0.17	0.88	1.24	1.38	1.52	1.86	▁▇▇▇▁
macro	3277	0.78	11.65	7.04	0.56	6.99	10.54	15.20	89.00	▇▂▁▁▁
micro	3298	0.78	34.54	6.75	7.00	31.50	35.63	39.12	52.42	▁▁▅▇▁
vtp	3298	0.78	46.01	7.27	15.00	40.81	46.25	51.34	87.80	▁▆▇▁▁
pla	3438	0.77	29.84	11.49	0.10	21.94	32.60	38.33	79.80	▂▅▇▁▁
at	8275	0.45	495.68	223.25	132.50	355.93	446.05	657.84	872.75	▇▇▅▇▆
silte	8282	0.45	100.64	69.49	1.25	50.86	77.87	138.20	305.00	▇▇▃▁▂
arg	8247	0.45	404.21	166.33	69.21	302.96	449.55	519.62	689.01	▅▂▅▇▅
hlifs	10452	0.30	14590.11	17253.55	158.39	1110.15	2409.80	29707.78	84692.90	▇▃▁▁▁
xco2_trend	7483	0.50	400.35	4.23	394.34	395.81	399.91	403.92	409.01	▇▅▅▆▂
xco2	7483	0.50	386.12	1.54	383.57	385.36	385.88	387.17	388.82	▇▇▇▇▇
sif	7483	0.50	0.67	0.32	0.29	0.38	0.57	0.86	1.42	▇▇▁▂▂
tmed	4103	0.73	23.79	4.35	12.60	21.60	24.50	27.20	31.50	▂▂▇▇▅
tmax	7963	0.47	32.13	4.32	20.70	30.02	32.60	35.10	40.40	▂▂▇▇▃
tmin	7963	0.47	19.04	4.01	5.10	16.10	19.80	22.40	25.90	▁▁▇▆▇
umed	4103	0.73	68.48	12.57	34.60	59.60	68.20	78.50	94.70	▂▅▇▇▃
umax	7963	0.47	93.56	6.71	71.60	90.50	95.70	99.40	100.00	▁▁▂▃▇
umin	7963	0.47	46.28	12.26	17.90	37.30	46.30	54.00	87.80	▂▇▇▂▁
pk_pa	4103	0.73	96.48	1.47	94.01	94.70	97.30	97.60	98.40	▆▂▁▇▆
rad	4422	0.70	16.14	4.85	3.50	12.70	15.80	20.50	27.90	▁▆▇▆▂
par	6891	0.54	284.33	142.31	8.80	233.70	294.40	388.60	515.50	▃▁▇▃▃
eto	4271	0.71	4.50	2.12	0.92	2.90	4.20	5.50	12.10	▇▇▃▂▁
velmax	7963	0.47	5.56	1.93	3.10	4.50	5.10	6.20	16.60	▇▃▁▁▁
velmin	7963	0.47	1.19	0.49	0.20	0.80	1.10	1.40	2.60	▂▇▅▂▁
dir_vel	7963	0.47	138.05	83.73	27.50	80.80	101.20	213.80	358.30	▇▇▁▃▂
chuva	4103	0.73	1.25	4.32	0.00	0.00	0.00	0.00	36.00	▇▁▁▁▁
inso	4103	0.73	6.57	2.80	0.00	4.80	6.80	8.70	11.10	▂▃▆▇▆

Variable type: POSIXct

skim_variable	n_missing	complete_rate	min	max	median	n_unique
data	0	1	2001-07-10	2019-12-01	2015-10-06	205

visdat::vis_miss(data_set)

data_set  |> 
  mutate(fco2_log = log(fco2)) |> 
  select(fco2,fco2_log,ts,us) |> 
  GGally::ggpairs()

data_set  |> 
  mutate(fco2_log = log(fco2)) |> 
  select(fco2,fco2_log,ph:h_al) |> 
  GGally::ggpairs()

data_set  |> 
  mutate(fco2_log = log(fco2)) |> 
  select(fco2,fco2_log,sb:pla) |> 
  GGally::ggpairs()

data_set  |> 
  mutate(fco2_log = log(fco2)) |> 
  select(fco2,fco2_log,at:hlifs) |> 
  GGally::ggpairs()

data_set  |> 
  mutate(fco2_log = log(fco2)) |> 
  select(fco2,fco2_log,xco2_trend:umin) |> 
  GGally::ggpairs()

data_set  |> 
  mutate(fco2_log = log(fco2)) |> 
  select(fco2,fco2_log,pk_pa:inso) |> 
  GGally::ggpairs()

Histogramas

my_vars <- data_set |> 
  select(fco2:inso) |> 
  names()
map(my_vars,~{
  var_sym <- sym(.x)
  x <- data_set |>pull(!!var_sym)
  x<-na.omit(x)
  plot_x<-data_set |>
    select(!!.x) |>
    drop_na() |>
    ggplot(aes(x=!!var_sym)) +
    geom_histogram(boundary=0, color="black", fill="gray",
                   bins = nclass.FD(x))
  print(plot_x)
  # print(summary(x))
})

## [[1]]

## 
## [[2]]

## 
## [[3]]

## 
## [[4]]

## 
## [[5]]

## 
## [[6]]

## 
## [[7]]

## 
## [[8]]

## 
## [[9]]

## 
## [[10]]

## 
## [[11]]

## 
## [[12]]

## 
## [[13]]

## 
## [[14]]

## 
## [[15]]

## 
## [[16]]

## 
## [[17]]

## 
## [[18]]

## 
## [[19]]

## 
## [[20]]

## 
## [[21]]

## 
## [[22]]

## 
## [[23]]

## 
## [[24]]

## 
## [[25]]

## 
## [[26]]

## 
## [[27]]

## 
## [[28]]

## 
## [[29]]

## 
## [[30]]

## 
## [[31]]

## 
## [[32]]

## 
## [[33]]

## 
## [[34]]

## 
## [[35]]

## 
## [[36]]

## 
## [[37]]

## 
## [[38]]

## 
## [[39]]

## 
## [[40]]

data_set |> 
select(cultura,year, hlifs) |> drop_na() |> arrange() |> 
group_by(year,cultura) |> 
  summarise(
  hlifs = mean(hlifs)
  )

## # A tibble: 10 × 3
## # Groups:   year [5]
##     year cultura         hlifs
##    <dbl> <chr>           <dbl>
##  1  2007 cana-de-acucar   205.
##  2  2008 cana-de-acucar  2215.
##  3  2017 cerrado        28019.
##  4  2017 eucalipto      38916.
##  5  2017 pinus          49523.
##  6  2017 silvipastoril  29867.
##  7  2018 pasto           2199.
##  8  2018 silvipastoril   8353.
##  9  2019 pasto           2482.
## 10  2019 silvipastoril  29675.

Importação e Tratamento

Trevelim, LP; Panosso, AR

2025-04-22

Objetivo

Carregando Pacotes

Importação de Dados

Histogramas