Load Data
Show Code
# load dates data
dates_orig <- load_dates_data (DATA_PATH)
# load motes data
motes_orig <- load_mote_location_data (DATA_PATH)
# load redwood data
redwood_all_orig <- load_redwood_data (DATA_PATH, source = "all" )
redwood_net_orig <- load_redwood_data (DATA_PATH, source = "net" )
redwood_log_orig <- load_redwood_data (DATA_PATH, source = "log" )
Quick Look
Show Code
# A tibble: 13,000 × 3
number date day
<int> <chr> <dbl>
1 1 Tue Apr 27 17:10:00 2004 12536.
2 2 Tue Apr 27 17:15:00 2004 12536.
3 3 Tue Apr 27 17:20:00 2004 12536.
4 4 Tue Apr 27 17:25:00 2004 12536.
5 5 Tue Apr 27 17:30:00 2004 12536.
6 6 Tue Apr 27 17:35:00 2004 12536.
7 7 Tue Apr 27 17:40:00 2004 12536.
8 8 Tue Apr 27 17:45:00 2004 12536.
9 9 Tue Apr 27 17:50:00 2004 12536.
10 10 Tue Apr 27 17:55:00 2004 12536.
# ℹ 12,990 more rows
Show Code
Data summary
Name
dates_orig
Number of rows
13000
Number of columns
3
_______________________
Column type frequency:
character
1
numeric
2
________________________
Group variables
None
Variable type: character
Variable type: numeric
number
0
1
6500.50
3752.92
1.00
3250.75
6500.50
9750.25
13000.00
▇▇▇▇▇
day
0
1
12558.57
13.03
12536.01
12547.29
12558.57
12569.86
12581.14
▇▇▇▇▇
Show Code
# A tibble: 80 × 5
ID Height Direc Dist Tree
<int> <dbl> <chr> <dbl> <chr>
1 24 10.5 WSW 0.1 edge
2 20 12.7 WSW 0.1 edge
3 27 14.9 WSW 0.1 edge
4 38 16.6 WSW 0.1 edge
5 0 16.9 NW 0.1 edge
6 67 19.7 SW 0.1 edge
7 55 21.5 SW 0.1 edge
8 114 23.3 SW 0.1 edge
9 198 24.8 NW 0.1 edge
10 111 26 WSW 0.1 edge
# ℹ 70 more rows
Show Code
Data summary
Name
motes_orig
Number of rows
80
Number of columns
5
_______________________
Column type frequency:
character
2
numeric
3
________________________
Group variables
None
Variable type: character
Direc
0
1
1
3
0
9
0
Tree
0
1
4
8
0
2
0
Variable type: numeric
ID
0
1
84.92
53.31
0.0
40.75
79.0
124.0
200.0
▇▇▆▆▂
Height
0
1
42.32
13.79
10.5
33.27
44.9
52.1
66.5
▂▃▆▇▃
Dist
0
1
0.82
1.34
0.1
0.10
0.1
1.0
5.0
▇▁▂▁▁
Show Code
# A tibble: 416,036 × 11
result_time epoch nodeid parent voltage depth humidity humid_temp
<dttm> <int> <int> <int> <dbl> <int> <dbl> <dbl>
1 2004-05-07 18:24:58 2812 119 5 220 2 95.4 12.7
2 2004-05-07 18:24:58 2812 105 129 223 3 97.0 12.6
3 2004-05-07 18:24:59 2812 113 118 222 4 94.5 12.5
4 2004-05-07 18:24:59 2812 138 5 223 2 96.9 12.7
5 2004-05-07 18:24:59 2812 127 42 222 3 97.8 12.4
6 2004-05-07 18:29:58 2813 74 5 220 2 96.1 12.9
7 2004-05-07 18:29:58 2813 197 110 219 3 97.4 12.2
8 2004-05-07 18:29:59 2813 77 3 222 3 96.3 12.4
9 2004-05-07 18:29:59 2813 138 5 224 2 98.2 12.5
10 2004-05-07 18:29:59 2813 113 118 224 4 96.3 12.4
# ℹ 416,026 more rows
# ℹ 3 more variables: humid_adj <dbl>, hamatop <dbl>, hamabot <dbl>
Show Code
skimr:: skim (redwood_all_orig)
Data summary
Name
redwood_all_orig
Number of rows
416036
Number of columns
11
_______________________
Column type frequency:
numeric
10
POSIXct
1
________________________
Group variables
None
Variable type: numeric
epoch
0
1.00
4219.64
2921.56
2.00
1625.00
3817.00
6589.00
12635.00
▇▅▅▃▁
nodeid
0
1.00
93.61
113.84
2.00
55.00
110.00
127.00
65535.00
▇▁▁▁▁
parent
0
1.00
1726.73
10196.59
0.00
42.00
118.00
140.00
65535.00
▇▁▁▁▁
voltage
0
1.00
82.73
176.41
0.01
2.65
2.75
214.00
1023.00
▇▃▁▁▁
depth
0
1.00
29.50
64.76
0.00
2.00
3.00
7.00
255.00
▇▁▁▁▁
humidity
12532
0.97
64.35
29.12
-9375.37
44.48
64.80
83.64
114.89
▁▁▁▁▇
humid_temp
12532
0.97
14.82
7.08
-38.40
10.56
14.10
17.94
603.84
▇▁▁▁▁
humid_adj
12532
0.97
62.36
25.90
-6334.83
43.88
63.02
80.43
147.69
▁▁▁▁▇
hamatop
12532
0.97
11049.06
43271.28
0.00
0.00
0.00
7250.00
22592200.00
▇▁▁▁▁
hamabot
12532
0.97
252.78
1090.41
0.00
0.00
0.00
0.00
465820.00
▇▁▁▁▁
Variable type: POSIXct
result_time
0
1
2004-05-07 18:24:58
2004-11-10 14:25:00
2004-11-10 14:25:00
114981
Show Code
# A tibble: 114,980 × 11
result_time epoch nodeid parent voltage depth humidity humid_temp
<dttm> <int> <int> <int> <int> <int> <dbl> <dbl>
1 2004-05-07 18:24:58 2812 119 5 220 2 95.4 12.7
2 2004-05-07 18:24:58 2812 105 129 223 3 97.0 12.6
3 2004-05-07 18:24:59 2812 113 118 222 4 94.5 12.5
4 2004-05-07 18:24:59 2812 138 5 223 2 96.9 12.7
5 2004-05-07 18:24:59 2812 127 42 222 3 97.8 12.4
6 2004-05-07 18:29:58 2813 74 5 220 2 96.1 12.9
7 2004-05-07 18:29:58 2813 197 110 219 3 97.4 12.2
8 2004-05-07 18:29:59 2813 77 3 222 3 96.3 12.4
9 2004-05-07 18:29:59 2813 138 5 224 2 98.2 12.5
10 2004-05-07 18:29:59 2813 113 118 224 4 96.3 12.4
# ℹ 114,970 more rows
# ℹ 3 more variables: humid_adj <dbl>, hamatop <dbl>, hamabot <dbl>
Show Code
skimr:: skim (redwood_net_orig)
Data summary
Name
redwood_net_orig
Number of rows
114980
Number of columns
11
_______________________
Column type frequency:
numeric
10
POSIXct
1
________________________
Group variables
None
Variable type: numeric
epoch
0
1.00
6567.79
2120.35
2812.00
4751.75
6567.00
8374.00
10288.00
▇▇▇▇▇
nodeid
0
1.00
95.02
51.56
3.00
59.00
110.00
127.00
198.00
▆▇▇▇▂
parent
0
1.00
126.19
1352.45
0.00
5.00
118.00
129.00
65535.00
▇▁▁▁▁
voltage
0
1.00
292.79
227.22
198.00
218.00
223.00
227.00
1023.00
▇▁▁▁▁
depth
0
1.00
2.46
5.31
1.00
2.00
2.00
3.00
255.00
▇▁▁▁▁
humidity
4262
0.96
72.12
21.33
-4.00
57.10
72.05
92.61
114.89
▁▂▇▇▆
humid_temp
4262
0.96
14.28
9.84
6.58
10.12
12.98
16.09
122.15
▇▁▁▁▁
humid_adj
4262
0.96
69.79
20.08
-3.03
55.79
69.77
89.05
147.69
▁▅▇▅▁
hamatop
4262
0.96
11521.65
24962.82
0.00
0.00
0.00
8436.36
113376.00
▇▁▁▁▁
hamabot
4262
0.96
271.95
805.31
0.00
0.00
0.00
0.00
9480.77
▇▁▁▁▁
Variable type: POSIXct
result_time
0
1
2004-05-07 18:24:58
2004-06-02 17:27:48
2004-05-20 19:21:21
114980
Show Code
# A tibble: 301,056 × 11
result_time epoch nodeid parent voltage depth humidity humid_temp
<dttm> <int> <int> <int> <dbl> <int> <dbl> <dbl>
1 2004-11-10 14:25:00 3 2 65535 2.88 255 43.8 22.4
2 2004-11-10 14:25:00 4 2 65535 2.88 255 44.8 22.2
3 2004-11-10 14:25:00 5 2 65535 2.88 255 45.8 22.1
4 2004-11-10 14:25:00 6 2 65535 2.88 255 46.4 22.0
5 2004-11-10 14:25:00 7 2 65535 2.88 255 46.0 22.0
6 2004-11-10 14:25:00 8 2 65535 2.88 255 46.3 22.0
7 2004-11-10 14:25:00 9 2 65535 2.88 255 46.8 22.0
8 2004-11-10 14:25:00 10 2 60 2.88 8 47.3 22.1
9 2004-11-10 14:25:00 11 2 15 2.88 4 48.1 22.2
10 2004-11-10 14:25:00 12 2 15 2.88 4 49.0 22.3
# ℹ 301,046 more rows
# ℹ 3 more variables: humid_adj <dbl>, hamatop <dbl>, hamabot <dbl>
Show Code
skimr:: skim (redwood_log_orig)
Data summary
Name
redwood_log_orig
Number of rows
301056
Number of columns
11
_______________________
Column type frequency:
numeric
10
POSIXct
1
________________________
Group variables
None
Variable type: numeric
epoch
0
1.00
3322.83
2677.35
2.00
1166.00
2420.00
5249.00
12635.00
▇▃▃▁▁
nodeid
0
1.00
93.08
129.97
2.00
49.00
105.00
127.00
65535.00
▇▁▁▁▁
parent
0
1.00
2338.01
11900.79
0.00
46.00
118.00
141.00
65535.00
▇▁▁▁▁
voltage
0
1.00
2.51
0.65
0.01
2.63
2.70
2.78
3.03
▁▁▁▁▇
depth
0
1.00
39.83
73.47
0.00
2.00
3.00
19.00
255.00
▇▁▁▁▁
humidity
8270
0.97
61.41
31.07
-9375.37
40.03
61.58
80.20
104.40
▁▁▁▁▇
humid_temp
8270
0.97
15.02
5.69
-38.40
10.86
14.72
18.81
603.84
▇▁▁▁▁
humid_adj
8270
0.97
59.55
27.27
-6334.83
39.53
60.00
77.22
100.22
▁▁▁▁▇
hamatop
8270
0.97
10870.34
48422.14
0.00
0.00
0.00
6762.33
22592200.00
▇▁▁▁▁
hamabot
8270
0.97
245.54
1180.34
0.00
0.00
0.00
0.00
465820.00
▇▁▁▁▁
Variable type: POSIXct
result_time
0
1
2004-11-10 14:25:00
2004-11-10 14:25:00
2004-11-10 14:25:00
1
Clean Data
Show Code
# TODO : fill out cleaning functions in clean.R
# clean dates data
dates_df <- clean_dates_data (dates_orig)
# clean motes data
motes_df <- clean_mote_location_data (motes_orig)
# clean redwood data
redwood_all_df <- clean_redwood_data (redwood_all_orig)
redwood_net_df <- clean_redwood_data (redwood_net_orig)
redwood_log_df <- clean_redwood_data (redwood_log_orig)
Merge data
Show Code
redwood_df <- merge_redwood_data (
dates_data = dates_df,
motes_data = motes_df,
redwood_net_data = redwood_net_df,
redwood_log_data = redwood_log_df
)
redwood_df |>
dplyr:: slice_head (n = 1000 ) |>
vthemes:: pretty_DT ()
Show Code
Data summary
Name
redwood_df
Number of rows
310258
Number of columns
22
_______________________
Column type frequency:
character
5
Date
1
factor
1
numeric
12
POSIXct
2
Timespan
1
________________________
Group variables
None
Variable type: character
source
0
1.00
3
3
0
2
0
time_chr
0
1.00
8
8
0
288
0
date_chr
0
1.00
15
15
0
45
0
Direc
6085
0.98
1
3
0
9
0
Tree
6085
0.98
4
8
0
2
0
Variable type: Date
date
0
1
2004-04-27
2004-06-10
2004-05-07
45
Variable type: factor
day_of_week
0
1
FALSE
7
Wed: 52681, Thu: 46702, Tue: 44570, Fri: 42824
Variable type: numeric
epoch
0
1.00
3712.24
2999.23
2.00
1236.00
2685.00
5969.75
12635.00
▇▃▃▂▁
nodeid
0
1.00
91.83
128.43
2.00
46.00
105.00
127.00
65535.00
▇▁▁▁▁
parent
0
1.00
2194.41
11527.89
0.00
44.00
118.00
140.00
65535.00
▇▁▁▁▁
voltage
0
1.00
27.96
111.66
0.01
2.64
2.71
2.80
1023.00
▇▁▁▁▁
depth
0
1.00
36.93
71.19
0.00
2.00
3.00
15.00
255.00
▇▁▁▁▁
humidity
0
1.00
61.44
30.82
-9375.37
40.13
61.25
80.57
114.89
▁▁▁▁▇
temp
0
1.00
15.37
7.55
-38.40
10.97
14.85
19.08
603.84
▇▁▁▁▁
iPAR
0
1.00
10709.38
47276.00
0.00
0.00
0.00
6614.04
22592200.00
▇▁▁▁▁
rPAR
0
1.00
231.61
1143.68
0.00
0.00
0.00
0.00
465820.00
▇▁▁▁▁
day
0
1.00
12548.89
10.41
12536.01
12540.30
12545.33
12556.73
12579.88
▇▃▃▂▁
Height
6085
0.98
47.74
12.04
10.50
40.30
49.60
56.10
66.50
▁▃▃▇▆
Dist
6085
0.98
0.99
1.47
0.10
0.10
0.10
1.00
5.00
▇▁▁▁▁
Variable type: POSIXct
result_time
0
1
2004-05-07 18:24:59
2004-11-10 14:25:00
2004-11-10 14:25:00
25646
datetime
0
1
2004-04-27 17:15:00
2004-06-10 14:00:00
2004-05-07 00:50:00
12634
Variable type: Timespan