Module 8: Data Wrangling - Chris Okitondo

Setup

Load needed packages.

Data loading

# path to data
data_location<- here::here("fluanalysis", "data", "SympAct_Any_Pos.Rda")
# load data
data <- readRDS(data_location)

Looking at the data

dplyr::glimpse(data)
Rows: 735
Columns: 63
$ DxName1           <fct> "Influenza like illness - Clinical Dx", "Acute tonsi…
$ DxName2           <fct> NA, "Influenza like illness - Clinical Dx", "Acute p…
$ DxName3           <fct> NA, NA, NA, NA, NA, NA, NA, NA, "Fever, unspecified"…
$ DxName4           <fct> NA, NA, NA, NA, NA, NA, NA, NA, "Other fatigue", NA,…
$ DxName5           <fct> NA, NA, NA, NA, NA, NA, NA, NA, "Headache", NA, NA, …
$ Unique.Visit      <chr> "340_17632125", "340_17794836", "342_17737773", "342…
$ ActivityLevel     <int> 10, 6, 2, 2, 5, 3, 4, 0, 0, 5, 9, 1, 3, 6, 5, 2, 2, …
$ ActivityLevelF    <fct> 10, 6, 2, 2, 5, 3, 4, 0, 0, 5, 9, 1, 3, 6, 5, 2, 2, …
$ SwollenLymphNodes <fct> Yes, Yes, Yes, Yes, Yes, No, No, No, Yes, No, Yes, Y…
$ ChestCongestion   <fct> No, Yes, Yes, Yes, No, No, No, Yes, Yes, Yes, Yes, Y…
$ ChillsSweats      <fct> No, No, Yes, Yes, Yes, Yes, Yes, Yes, Yes, No, Yes, …
$ NasalCongestion   <fct> No, Yes, Yes, Yes, No, No, No, Yes, Yes, Yes, Yes, Y…
$ CoughYN           <fct> Yes, Yes, No, Yes, No, Yes, Yes, Yes, Yes, Yes, No, …
$ Sneeze            <fct> No, No, Yes, Yes, No, Yes, No, Yes, No, No, No, No, …
$ Fatigue           <fct> Yes, Yes, Yes, Yes, Yes, Yes, Yes, Yes, Yes, Yes, Ye…
$ SubjectiveFever   <fct> Yes, Yes, Yes, Yes, Yes, Yes, Yes, Yes, Yes, No, Yes…
$ Headache          <fct> Yes, Yes, Yes, Yes, Yes, Yes, No, Yes, Yes, Yes, Yes…
$ Weakness          <fct> Mild, Severe, Severe, Severe, Moderate, Moderate, Mi…
$ WeaknessYN        <fct> Yes, Yes, Yes, Yes, Yes, Yes, Yes, Yes, Yes, Yes, Ye…
$ CoughIntensity    <fct> Severe, Severe, Mild, Moderate, None, Moderate, Seve…
$ CoughYN2          <fct> Yes, Yes, Yes, Yes, No, Yes, Yes, Yes, Yes, Yes, Yes…
$ Myalgia           <fct> Mild, Severe, Severe, Severe, Mild, Moderate, Mild, …
$ MyalgiaYN         <fct> Yes, Yes, Yes, Yes, Yes, Yes, Yes, Yes, Yes, Yes, Ye…
$ RunnyNose         <fct> No, No, Yes, Yes, No, No, Yes, Yes, Yes, Yes, No, No…
$ AbPain            <fct> No, No, Yes, No, No, No, No, No, No, No, Yes, Yes, N…
$ ChestPain         <fct> No, No, Yes, No, No, Yes, Yes, No, No, No, No, Yes, …
$ Diarrhea          <fct> No, No, No, No, No, Yes, No, No, No, No, No, No, No,…
$ EyePn             <fct> No, No, No, No, Yes, No, No, No, No, No, Yes, No, Ye…
$ Insomnia          <fct> No, No, Yes, Yes, Yes, No, No, Yes, Yes, Yes, Yes, Y…
$ ItchyEye          <fct> No, No, No, No, No, No, No, No, No, No, No, No, Yes,…
$ Nausea            <fct> No, No, Yes, Yes, Yes, Yes, No, No, Yes, Yes, Yes, Y…
$ EarPn             <fct> No, Yes, No, Yes, No, No, No, No, No, No, No, Yes, Y…
$ Hearing           <fct> No, Yes, No, No, No, No, No, No, No, No, No, No, No,…
$ Pharyngitis       <fct> Yes, Yes, Yes, Yes, Yes, Yes, Yes, No, No, No, Yes, …
$ Breathless        <fct> No, No, Yes, No, No, Yes, No, No, No, Yes, No, Yes, …
$ ToothPn           <fct> No, No, Yes, No, No, No, No, No, Yes, No, No, Yes, N…
$ Vision            <fct> No, No, No, No, No, No, No, No, No, No, No, No, No, …
$ Vomit             <fct> No, No, No, No, No, No, Yes, No, No, No, Yes, Yes, N…
$ Wheeze            <fct> No, No, No, Yes, No, Yes, No, No, No, No, No, Yes, N…
$ BodyTemp          <dbl> 98.3, 100.4, 100.8, 98.8, 100.5, 98.4, 102.5, 98.4, …
$ RapidFluA         <fct> Presumptive Negative For Influenza A, NA, Presumptiv…
$ RapidFluB         <fct> Presumptive Negative For Influenza B, NA, Presumptiv…
$ PCRFluA           <fct> NA, NA, NA, NA, NA, NA,  Influenza A Not Detected, N…
$ PCRFluB           <fct> NA, NA, NA, NA, NA, NA,  Influenza B Not Detected, N…
$ TransScore1       <dbl> 1, 3, 4, 5, 0, 2, 2, 5, 4, 4, 2, 3, 2, 5, 3, 5, 1, 5…
$ TransScore1F      <fct> 1, 3, 4, 5, 0, 2, 2, 5, 4, 4, 2, 3, 2, 5, 3, 5, 1, 5…
$ TransScore2       <dbl> 1, 2, 3, 4, 0, 2, 2, 4, 3, 3, 1, 2, 2, 4, 2, 4, 1, 4…
$ TransScore2F      <fct> 1, 2, 3, 4, 0, 2, 2, 4, 3, 3, 1, 2, 2, 4, 2, 4, 1, 4…
$ TransScore3       <dbl> 1, 1, 2, 3, 0, 2, 2, 3, 2, 2, 0, 1, 1, 3, 1, 3, 1, 3…
$ TransScore3F      <fct> 1, 1, 2, 3, 0, 2, 2, 3, 2, 2, 0, 1, 1, 3, 1, 3, 1, 3…
$ TransScore4       <dbl> 0, 2, 4, 4, 0, 1, 1, 4, 3, 3, 2, 2, 2, 4, 3, 4, 0, 4…
$ TransScore4F      <fct> 0, 2, 4, 4, 0, 1, 1, 4, 3, 3, 2, 2, 2, 4, 3, 4, 0, 4…
$ ImpactScore       <int> 7, 8, 14, 12, 11, 12, 8, 7, 10, 7, 13, 17, 11, 13, 9…
$ ImpactScore2      <int> 6, 7, 13, 11, 10, 11, 7, 6, 9, 6, 12, 16, 10, 12, 8,…
$ ImpactScore3      <int> 3, 4, 9, 7, 6, 7, 3, 3, 6, 4, 7, 11, 6, 8, 4, 4, 5, …
$ ImpactScoreF      <fct> 7, 8, 14, 12, 11, 12, 8, 7, 10, 7, 13, 17, 11, 13, 9…
$ ImpactScore2F     <fct> 6, 7, 13, 11, 10, 11, 7, 6, 9, 6, 12, 16, 10, 12, 8,…
$ ImpactScore3F     <fct> 3, 4, 9, 7, 6, 7, 3, 3, 6, 4, 7, 11, 6, 8, 4, 4, 5, …
$ ImpactScoreFD     <fct> 7, 8, 14, 12, 11, 12, 8, 7, 10, 7, 13, 17, 11, 13, 9…
$ TotalSymp1        <dbl> 8, 11, 18, 17, 11, 14, 10, 12, 14, 11, 15, 20, 13, 1…
$ TotalSymp1F       <fct> 8, 11, 18, 17, 11, 14, 10, 12, 14, 11, 15, 20, 13, 1…
$ TotalSymp2        <dbl> 8, 10, 17, 16, 11, 14, 10, 11, 13, 10, 14, 19, 13, 1…
$ TotalSymp3        <dbl> 8, 9, 16, 15, 11, 14, 10, 10, 12, 9, 13, 18, 12, 16,…
summary(data)
                                           DxName1   
 Influenza like illness - Clinical Dx          :328  
 Influenza - Virus Identified                  :131  
 Fever, unspecified                            :101  
 Cough                                         : 66  
 Acute pharyngitis, unspecified                : 50  
 Acute upper respiratory infection, unspecified: 22  
 (Other)                                       : 37  
                                 DxName2   
 Influenza - Virus Identified        :126  
 Influenza like illness - Clinical Dx:115  
 Fever, unspecified                  : 45  
 Cough                               : 41  
 Acute pharyngitis, unspecified      : 31  
 (Other)                             : 97  
 NA's                                :280  
                                 DxName3   
 Influenza - Virus Identified        : 23  
 Influenza like illness - Clinical Dx: 14  
 Cough                               : 10  
 Fever, unspecified                  :  6  
 Acute pharyngitis, unspecified      :  4  
 (Other)                             : 52  
 NA's                                :626  
                                           DxName4   
 Influenza - Virus Identified                  :  3  
 Acute upper respiratory infection, unspecified:  2  
 Encounter for immunization                    :  2  
 Influenza like illness - Clinical Dx          :  2  
 Acute pharyngitis, unspecified                :  1  
 (Other)                                       :  9  
 NA's                                          :716  
                                                                                               DxName5   
 Acute suppurative otitis media without spontaneous rupture of ear drum, right ear                 :  0  
 Encounter for immunization                                                                        :  0  
 Headache                                                                                          :  1  
 Other infectious mononucleosis without complication                                               :  0  
 Strain of other flexor muscle, fascia and tendon at forearm level, right arm, subsequent encounter:  0  
 NA's                                                                                              :734  
                                                                                                         
 Unique.Visit       ActivityLevel    ActivityLevelF SwollenLymphNodes
 Length:735         Min.   : 0.000   3      :125    No :421          
 Class :character   1st Qu.: 3.000   5      : 97    Yes:314          
 Mode  :character   Median : 4.000   4      : 95                     
                    Mean   : 4.463   2      : 80                     
                    3rd Qu.: 6.000   7      : 68                     
                    Max.   :10.000   6      : 66                     
                                     (Other):204                     
 ChestCongestion ChillsSweats NasalCongestion CoughYN   Sneeze    Fatigue  
 No :326         No :131      No :170         No : 75   No :340   No : 64  
 Yes:409         Yes:604      Yes:565         Yes:660   Yes:395   Yes:671  
                                                                           
                                                                           
                                                                           
                                                                           
                                                                           
 SubjectiveFever Headache      Weakness   WeaknessYN  CoughIntensity CoughYN2 
 No :230         No :115   None    : 49   No : 49    None    : 47    No : 47  
 Yes:505         Yes:620   Mild    :224   Yes:686    Mild    :156    Yes:688  
                           Moderate:341              Moderate:360             
                           Severe  :121              Severe  :172             
                                                                              
                                                                              
                                                                              
     Myalgia    MyalgiaYN RunnyNose AbPain    ChestPain Diarrhea  EyePn    
 None    : 79   No : 79   No :211   No :642   No :501   No :636   No :622  
 Mild    :214   Yes:656   Yes:524   Yes: 93   Yes:234   Yes: 99   Yes:113  
 Moderate:327                                                              
 Severe  :115                                                              
                                                                           
                                                                           
                                                                           
 Insomnia  ItchyEye  Nausea    EarPn     Hearing   Pharyngitis Breathless
 No :316   No :553   No :477   No :573   No :705   No :121     No :438   
 Yes:419   Yes:182   Yes:258   Yes:162   Yes: 30   Yes:614     Yes:297   
                                                                         
                                                                         
                                                                         
                                                                         
                                                                         
 ToothPn   Vision    Vomit     Wheeze       BodyTemp     
 No :569   No :716   No :656   No :514   Min.   : 97.20  
 Yes:166   Yes: 19   Yes: 79   Yes:221   1st Qu.: 98.20  
                                         Median : 98.50  
                                         Mean   : 98.94  
                                         3rd Qu.: 99.30  
                                         Max.   :103.10  
                                         NA's   :5       
                                RapidFluA  
 Positive for Influenza A            :169  
 Presumptive Negative For Influenza A:159  
 NA's                                :407  
                                           
                                           
                                           
                                           
                                RapidFluB                        PCRFluA   
 Positive for Influenza B            : 26    Influenza A Detected    :120  
 Presumptive Negative For Influenza B:302    Influenza A Not Detected: 33  
 NA's                                :407   Assay Invalid            :  0  
                                            Indeterminate            :  1  
                                            NA's                     :581  
                                                                           
                                                                           
                      PCRFluB     TransScore1    TransScore1F  TransScore2   
  Influenza B Detected    :  9   Min.   :0.000   0: 13        Min.   :0.000  
  Influenza B Not Detected:145   1st Qu.:3.000   1: 53        1st Qu.:2.000  
 Assay Invalid            :  0   Median :4.000   2:107        Median :3.000  
 NA's                     :581   Mean   :3.473   3:157        Mean   :2.917  
                                 3rd Qu.:5.000   4:210        3rd Qu.:4.000  
                                 Max.   :5.000   5:195        Max.   :4.000  
                                                                             
 TransScore2F  TransScore3    TransScore3F  TransScore4    TransScore4F
 0: 13        Min.   :0.000   0: 24        Min.   :0.000   0: 50       
 1: 89        1st Qu.:1.000   1:166        1st Qu.:2.000   1:103       
 2:138        Median :2.000   2:222        Median :3.000   2:154       
 3:201        Mean   :2.148   3:323        Mean   :2.576   3:230       
 4:294        3rd Qu.:3.000                3rd Qu.:4.000   4:198       
              Max.   :3.000                Max.   :4.000               
                                                                       
  ImpactScore      ImpactScore2     ImpactScore3    ImpactScoreF ImpactScore2F
 Min.   : 2.000   Min.   : 2.000   Min.   : 0.00   8      :105   7      :107  
 1st Qu.: 8.000   1st Qu.: 7.000   1st Qu.: 3.00   9      :104   8      :102  
 Median : 9.000   Median : 8.000   Median : 5.00   10     : 88   9      : 90  
 Mean   : 9.514   Mean   : 8.581   Mean   : 5.06   7      : 84   10     : 86  
 3rd Qu.:11.000   3rd Qu.:10.000   3rd Qu.: 7.00   11     : 82   6      : 85  
 Max.   :18.000   Max.   :17.000   Max.   :13.00   12     : 58   11     : 59  
                                                   (Other):214   (Other):206  
 ImpactScore3F ImpactScoreFD   TotalSymp1     TotalSymp1F    TotalSymp2   
 4      :134   8      :105   Min.   : 5.00   12     : 86   Min.   : 4.00  
 5      :112   9      :104   1st Qu.:11.00   13     : 84   1st Qu.:10.00  
 3      :108   10     : 88   Median :13.00   14     : 80   Median :12.00  
 6      :102   7      : 84   Mean   :12.99   11     : 72   Mean   :12.43  
 7      : 66   11     : 82   3rd Qu.:15.00   10     : 62   3rd Qu.:15.00  
 2      : 64   12     : 58   Max.   :23.00   15     : 61   Max.   :22.00  
 (Other):149   (Other):214                   (Other):290                  
   TotalSymp3   
 Min.   : 3.00  
 1st Qu.:10.00  
 Median :12.00  
 Mean   :11.66  
 3rd Qu.:14.00  
 Max.   :21.00  
                
head(data)
                               DxName1
1 Influenza like illness - Clinical Dx
2       Acute tonsillitis, unspecified
3 Influenza like illness - Clinical Dx
4 Influenza like illness - Clinical Dx
5       Acute pharyngitis, unspecified
6 Influenza like illness - Clinical Dx
                                       DxName2 DxName3 DxName4 DxName5
1                                         <NA>    <NA>    <NA>    <NA>
2         Influenza like illness - Clinical Dx    <NA>    <NA>    <NA>
3               Acute pharyngitis, unspecified    <NA>    <NA>    <NA>
4 Unspecified asthma with (acute) exacerbation    <NA>    <NA>    <NA>
5         Influenza like illness - Clinical Dx    <NA>    <NA>    <NA>
6                                         <NA>    <NA>    <NA>    <NA>
  Unique.Visit ActivityLevel ActivityLevelF SwollenLymphNodes ChestCongestion
1 340_17632125            10             10               Yes              No
2 340_17794836             6              6               Yes             Yes
3 342_17737773             2              2               Yes             Yes
4 342_17806002             2              2               Yes             Yes
5 342_17610918             5              5               Yes              No
6 343_17543967             3              3                No              No
  ChillsSweats NasalCongestion CoughYN Sneeze Fatigue SubjectiveFever Headache
1           No              No     Yes     No     Yes             Yes      Yes
2           No             Yes     Yes     No     Yes             Yes      Yes
3          Yes             Yes      No    Yes     Yes             Yes      Yes
4          Yes             Yes     Yes    Yes     Yes             Yes      Yes
5          Yes              No      No     No     Yes             Yes      Yes
6          Yes              No     Yes    Yes     Yes             Yes      Yes
  Weakness WeaknessYN CoughIntensity CoughYN2  Myalgia MyalgiaYN RunnyNose
1     Mild        Yes         Severe      Yes     Mild       Yes        No
2   Severe        Yes         Severe      Yes   Severe       Yes        No
3   Severe        Yes           Mild      Yes   Severe       Yes       Yes
4   Severe        Yes       Moderate      Yes   Severe       Yes       Yes
5 Moderate        Yes           None       No     Mild       Yes        No
6 Moderate        Yes       Moderate      Yes Moderate       Yes        No
  AbPain ChestPain Diarrhea EyePn Insomnia ItchyEye Nausea EarPn Hearing
1     No        No       No    No       No       No     No    No      No
2     No        No       No    No       No       No     No   Yes     Yes
3    Yes       Yes       No    No      Yes       No    Yes    No      No
4     No        No       No    No      Yes       No    Yes   Yes      No
5     No        No       No   Yes      Yes       No    Yes    No      No
6     No       Yes      Yes    No       No       No    Yes    No      No
  Pharyngitis Breathless ToothPn Vision Vomit Wheeze BodyTemp
1         Yes         No      No     No    No     No     98.3
2         Yes         No      No     No    No     No    100.4
3         Yes        Yes     Yes     No    No     No    100.8
4         Yes         No      No     No    No    Yes     98.8
5         Yes         No      No     No    No     No    100.5
6         Yes        Yes      No     No    No    Yes     98.4
                             RapidFluA                            RapidFluB
1 Presumptive Negative For Influenza A Presumptive Negative For Influenza B
2                                 <NA>                                 <NA>
3 Presumptive Negative For Influenza A Presumptive Negative For Influenza B
4 Presumptive Negative For Influenza A Presumptive Negative For Influenza B
5                                 <NA>                                 <NA>
6                                 <NA>                                 <NA>
  PCRFluA PCRFluB TransScore1 TransScore1F TransScore2 TransScore2F TransScore3
1    <NA>    <NA>           1            1           1            1           1
2    <NA>    <NA>           3            3           2            2           1
3    <NA>    <NA>           4            4           3            3           2
4    <NA>    <NA>           5            5           4            4           3
5    <NA>    <NA>           0            0           0            0           0
6    <NA>    <NA>           2            2           2            2           2
  TransScore3F TransScore4 TransScore4F ImpactScore ImpactScore2 ImpactScore3
1            1           0            0           7            6            3
2            1           2            2           8            7            4
3            2           4            4          14           13            9
4            3           4            4          12           11            7
5            0           0            0          11           10            6
6            2           1            1          12           11            7
  ImpactScoreF ImpactScore2F ImpactScore3F ImpactScoreFD TotalSymp1 TotalSymp1F
1            7             6             3             7          8           8
2            8             7             4             8         11          11
3           14            13             9            14         18          18
4           12            11             7            12         17          17
5           11            10             6            11         11          11
6           12            11             7            12         14          14
  TotalSymp2 TotalSymp3
1          8          8
2         10          9
3         17         16
4         16         15
5         11         11
6         14         14
skimr::skim(data)
Data summary
Name data
Number of rows 735
Number of columns 63
_______________________
Column type frequency:
character 1
factor 50
numeric 12
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
Unique.Visit 0 1 10 12 0 735 0

Variable type: factor

skim_variable n_missing complete_rate ordered n_unique top_counts
DxName1 0 1.00 FALSE 25 Inf: 328, Inf: 131, Fev: 101, Cou: 66
DxName2 280 0.62 FALSE 42 Inf: 126, Inf: 115, Fev: 45, Cou: 41
DxName3 626 0.15 FALSE 37 Inf: 23, Inf: 14, Cou: 10, Fev: 6
DxName4 716 0.03 FALSE 14 Inf: 3, Acu: 2, Enc: 2, Inf: 2
DxName5 734 0.00 FALSE 1 Hea: 1, Acu: 0, Enc: 0, Oth: 0
ActivityLevelF 0 1.00 FALSE 11 3: 125, 5: 97, 4: 95, 2: 80
SwollenLymphNodes 0 1.00 FALSE 2 No: 421, Yes: 314
ChestCongestion 0 1.00 FALSE 2 Yes: 409, No: 326
ChillsSweats 0 1.00 FALSE 2 Yes: 604, No: 131
NasalCongestion 0 1.00 FALSE 2 Yes: 565, No: 170
CoughYN 0 1.00 FALSE 2 Yes: 660, No: 75
Sneeze 0 1.00 FALSE 2 Yes: 395, No: 340
Fatigue 0 1.00 FALSE 2 Yes: 671, No: 64
SubjectiveFever 0 1.00 FALSE 2 Yes: 505, No: 230
Headache 0 1.00 FALSE 2 Yes: 620, No: 115
Weakness 0 1.00 FALSE 4 Mod: 341, Mil: 224, Sev: 121, Non: 49
WeaknessYN 0 1.00 FALSE 2 Yes: 686, No: 49
CoughIntensity 0 1.00 FALSE 4 Mod: 360, Sev: 172, Mil: 156, Non: 47
CoughYN2 0 1.00 FALSE 2 Yes: 688, No: 47
Myalgia 0 1.00 FALSE 4 Mod: 327, Mil: 214, Sev: 115, Non: 79
MyalgiaYN 0 1.00 FALSE 2 Yes: 656, No: 79
RunnyNose 0 1.00 FALSE 2 Yes: 524, No: 211
AbPain 0 1.00 FALSE 2 No: 642, Yes: 93
ChestPain 0 1.00 FALSE 2 No: 501, Yes: 234
Diarrhea 0 1.00 FALSE 2 No: 636, Yes: 99
EyePn 0 1.00 FALSE 2 No: 622, Yes: 113
Insomnia 0 1.00 FALSE 2 Yes: 419, No: 316
ItchyEye 0 1.00 FALSE 2 No: 553, Yes: 182
Nausea 0 1.00 FALSE 2 No: 477, Yes: 258
EarPn 0 1.00 FALSE 2 No: 573, Yes: 162
Hearing 0 1.00 FALSE 2 No: 705, Yes: 30
Pharyngitis 0 1.00 FALSE 2 Yes: 614, No: 121
Breathless 0 1.00 FALSE 2 No: 438, Yes: 297
ToothPn 0 1.00 FALSE 2 No: 569, Yes: 166
Vision 0 1.00 FALSE 2 No: 716, Yes: 19
Vomit 0 1.00 FALSE 2 No: 656, Yes: 79
Wheeze 0 1.00 FALSE 2 No: 514, Yes: 221
RapidFluA 407 0.45 FALSE 2 Pos: 169, Pre: 159
RapidFluB 407 0.45 FALSE 2 Pre: 302, Pos: 26
PCRFluA 581 0.21 FALSE 3 In: 120, In: 33, Ind: 1, Ass: 0
PCRFluB 581 0.21 FALSE 2 In: 145, In: 9, Ass: 0
TransScore1F 0 1.00 FALSE 6 4: 210, 5: 195, 3: 157, 2: 107
TransScore2F 0 1.00 FALSE 5 4: 294, 3: 201, 2: 138, 1: 89
TransScore3F 0 1.00 FALSE 4 3: 323, 2: 222, 1: 166, 0: 24
TransScore4F 0 1.00 FALSE 5 3: 230, 4: 198, 2: 154, 1: 103
ImpactScoreF 0 1.00 FALSE 17 8: 105, 9: 104, 10: 88, 7: 84
ImpactScore2F 0 1.00 FALSE 16 7: 107, 8: 102, 9: 90, 10: 86
ImpactScore3F 0 1.00 FALSE 14 4: 134, 5: 112, 3: 108, 6: 102
ImpactScoreFD 0 1.00 FALSE 17 8: 105, 9: 104, 10: 88, 7: 84
TotalSymp1F 0 1.00 FALSE 19 12: 86, 13: 84, 14: 80, 11: 72

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
ActivityLevel 0 1.00 4.46 2.64 0.0 3.0 4.0 6.0 10.0 ▆▇▆▅▂
BodyTemp 5 0.99 98.94 1.20 97.2 98.2 98.5 99.3 103.1 ▇▇▂▁▁
TransScore1 0 1.00 3.47 1.31 0.0 3.0 4.0 5.0 5.0 ▂▅▆▇▇
TransScore2 0 1.00 2.92 1.11 0.0 2.0 3.0 4.0 4.0 ▁▂▃▆▇
TransScore3 0 1.00 2.15 0.88 0.0 1.0 2.0 3.0 3.0 ▁▅▁▆▇
TransScore4 0 1.00 2.58 1.21 0.0 2.0 3.0 4.0 4.0 ▂▃▆▇▇
ImpactScore 0 1.00 9.51 2.84 2.0 8.0 9.0 11.0 18.0 ▂▇▇▅▁
ImpactScore2 0 1.00 8.58 2.78 2.0 7.0 8.0 10.0 17.0 ▂▇▆▃▁
ImpactScore3 0 1.00 5.06 2.34 0.0 3.0 5.0 7.0 13.0 ▂▇▃▂▁
TotalSymp1 0 1.00 12.99 3.41 5.0 11.0 13.0 15.0 23.0 ▂▇▇▅▁
TotalSymp2 0 1.00 12.43 3.22 4.0 10.0 12.0 15.0 22.0 ▁▇▇▅▁
TotalSymp3 0 1.00 11.66 3.10 3.0 10.0 12.0 14.0 21.0 ▁▇▇▅▁

Checking variable names

names(data)
 [1] "DxName1"           "DxName2"           "DxName3"          
 [4] "DxName4"           "DxName5"           "Unique.Visit"     
 [7] "ActivityLevel"     "ActivityLevelF"    "SwollenLymphNodes"
[10] "ChestCongestion"   "ChillsSweats"      "NasalCongestion"  
[13] "CoughYN"           "Sneeze"            "Fatigue"          
[16] "SubjectiveFever"   "Headache"          "Weakness"         
[19] "WeaknessYN"        "CoughIntensity"    "CoughYN2"         
[22] "Myalgia"           "MyalgiaYN"         "RunnyNose"        
[25] "AbPain"            "ChestPain"         "Diarrhea"         
[28] "EyePn"             "Insomnia"          "ItchyEye"         
[31] "Nausea"            "EarPn"             "Hearing"          
[34] "Pharyngitis"       "Breathless"        "ToothPn"          
[37] "Vision"            "Vomit"             "Wheeze"           
[40] "BodyTemp"          "RapidFluA"         "RapidFluB"        
[43] "PCRFluA"           "PCRFluB"           "TransScore1"      
[46] "TransScore1F"      "TransScore2"       "TransScore2F"     
[49] "TransScore3"       "TransScore3F"      "TransScore4"      
[52] "TransScore4F"      "ImpactScore"       "ImpactScore2"     
[55] "ImpactScore3"      "ImpactScoreF"      "ImpactScore2F"    
[58] "ImpactScore3F"     "ImpactScoreFD"     "TotalSymp1"       
[61] "TotalSymp1F"       "TotalSymp2"        "TotalSymp3"       

Step 1: Removing all variables that have Score or Total or FluA or FluB or Dxname or Activity in their name

data1 <- data %>% dplyr:: select(-matches("Score|Total|FluA|FluB|Dxname|Activity"))

Step 2: Removing the variable Unique.Visit.

data2 <- data1[,-1]

Step 3: Removing all NA observations in the dataset

data3 <- na.omit(data2)

Naming the newly processed data as cleaned_data

cleaned_data <- data3

Saving the cleaned data

save_data_location <- here::here("fluanalysis", "data", "cleaned_data.rds")
saveRDS(cleaned_data, file = save_data_location)

Addition for Module 11: Preparing data for Machine Learning

Loading the previous cleaned data for additional cleaning

cleaned_data <- readRDS(here("fluanalysis", "data", "cleaned_data.rds"))

We are getting rid of repetitive variables

cleaned_data_v2 <- cleaned_data %>% select(-CoughYN,-WeaknessYN,-CoughYN2,-MyalgiaYN)

We are getting rid of hearing and vision variables since they have less than 5- entries (Y/N)

cleaned_data_v3 <- cleaned_data_v2 %>% select(-Vision,-Hearing)

Our data is now made of 730 observations and 26 variables. This is what we will use for Module 11.

saveRDS(cleaned_data_v3, file = here("fluanalysis", "data", "cleaned_data_mod11.rds"))