Skip to contents

Compares the column names of the two input data.frames, df1 and df2, and displays whether they are the same or not. Useful ahead of an rbind function. If the columns are uneven, the function will create dummy columns to make up the numbers for the comparison.

Usage

Column_Comparison(df1, df2, Summary_Message = TRUE)

Arguments

df1

Required - the first data.frame to be compared.

df2

Required - the second data.frame to be compared.

Summary_Message

Optional - default: TRUE - TRUE or FALSE. Decides whether a summary message should be included.

Value

A comparison data.frame, listing the names of the columns from df1 and df2.

Examples

# Example data.frames have been created to give an example of using the
# Column_Comparison to compare the column names.

breakfast_df
#>    food_code      food_name WATERg PROCNTg FAT_g_combined CHOAVLg
#> 1      F0001          Bacon     10      15             21      10
#> 2      F0002          Beans     15      10             12       1
#> 3      F0003          Toast     20      20             NA      24
#> 4      F0004       Mushroom     25      15             16      46
#> 5      F0005           Eggs     30      21             11      20
#> 6      F0006         Tomato     35      28             33       2
#> 7      F0007        Sausage     40      10             13      24
#> 8      F0008         Butter     NA      27             16      22
#> 9      F0009    Brown Sauce     NA      NA             NA      NA
#> 10     F0010 Tomato Ketchup     NA      NA             NA      NA
#>    FIBTGg_combined ALCg ASHg THIAmg THIAHCLmg RETOLmcg CARTBEQmcg_combined
#> 1               12   12   12     32        21       53                  NA
#> 2                3   43    3     90        45       12                  51
#> 3                8    8   28    130        20       20                  91
#> 4               15   15   15     21        69       NA                  22
#> 5                6    6    6   <NA>        42       62                  62
#> 6                2    2    2     61       150       40                 102
#> 7                9    9    9     21        23      140                  32
#> 8               13   13   13               30      210                  72
#> 9               NA   NA   NA     59        52       41                 112
#> 10              NA   NA   NA   <NA>        NA       NA                  NA
#>    NIAmg TRPmg NIAEQmg NIATRPmg FATg FAT_g FATCEg
#> 1  172.0 126.0    85.3     2.10   21  20.9     NA
#> 2     NA 142.5    49.2       NA   NA  12.0     NA
#> 3   24.4 142.7    86.4     2.38   NA    NA     NA
#> 4     NA 167.0    23.2     2.80   NA    NA   16.0
#> 5    8.1    NA    30.5       NA   11  10.9     NA
#> 6  134.6  76.3    83.3     1.27   NA  33.0   33.0
#> 7   10.2  98.6    16.6     1.64   13  12.1     NA
#> 8  187.9  41.4    84.5     0.69   16  16.1   15.9
#> 9   92.0 172.0    17.7     2.87   NA    NA     NA
#> 10    NA    NA      NA       NA   NA    NA     NA
#>                          comments
#> 1                                
#> 2  These are imaginary food items
#> 3                            <NA>
#> 4  With imaginary nutrient values
#> 5                                
#> 6                      And blanks
#> 7                            <NA>
#> 8       To test different outputs
#> 9                                
#> 10                  And scenarios
#
breakfast_df_nonstandard
#>    food_code      food_name Water_values_g PROCNT_values_g
#> 1      F0001          Bacon             10              15
#> 2      F0002          Beans             15              10
#> 3      F0003          Toast             20              20
#> 4      F0004       Mushroom             25              15
#> 5      F0005           Eggs             30              21
#> 6      F0006         Tomato             35              28
#> 7      F0007        Sausage             40              10
#> 8      F0008         Butter             NA              27
#> 9      F0009    Brown Sauce             NA              NA
#> 10     F0010 Tomato Ketchup             NA              NA
#>    FAT_values_g_combined CHOAVL_values_g FIBTG_values_g_combined ALC_values_g
#> 1                     21              10                      12           12
#> 2                     12               1                       3           43
#> 3                     NA              24                       8            8
#> 4                     16              46                      15           15
#> 5                     11              20                       6            6
#> 6                     33               2                       2            2
#> 7                     13              24                       9            9
#> 8                     16              22                      13           13
#> 9                     NA              NA                      NA           NA
#> 10                    NA              NA                      NA           NA
#>    ASH_values_g Thiamine_milligrams Thiamine_from_HCL_milligrams
#> 1            12                  32                           21
#> 2             3                  90                           45
#> 3            28                 130                           20
#> 4            15                  21                           69
#> 5             6                <NA>                           42
#> 6             2                  61                          150
#> 7             9                  21                           23
#> 8            13                                               30
#> 9            NA                  59                           52
#> 10           NA                <NA>                           NA
#>    Retinol_micrograms Beta_Carotene_Equivalents_micrograms Niacin_milligrams
#> 1                  53                                   NA             172.0
#> 2                  12                                   51                NA
#> 3                  20                                   91              24.4
#> 4                  NA                                   22                NA
#> 5                  62                                   62               8.1
#> 6                  40                                  102             134.6
#> 7                 140                                   32              10.2
#> 8                 210                                   72             187.9
#> 9                  41                                  112              92.0
#> 10                 NA                                   NA                NA
#>    Tryptophan_milligrams Niacin_eq_milligrams Niacine_from_TRP_mg FAT_in_g
#> 1                  126.0                 85.3                2.10       21
#> 2                  142.5                 49.2                  NA       NA
#> 3                  142.7                 86.4                2.38       NA
#> 4                  167.0                 23.2                2.80       NA
#> 5                     NA                 30.5                  NA       11
#> 6                   76.3                 83.3                1.27       NA
#> 7                   98.6                 16.6                1.64       13
#> 8                   41.4                 84.5                0.69       16
#> 9                  172.0                 17.7                2.87       NA
#> 10                    NA                   NA                  NA       NA
#>    FAT_unknown_calc_g FAT_continuous_extraction_g
#> 1                20.9                          NA
#> 2                12.0                          NA
#> 3                  NA                          NA
#> 4                  NA                        16.0
#> 5                10.9                          NA
#> 6                33.0                        33.0
#> 7                12.1                          NA
#> 8                16.1                        15.9
#> 9                  NA                          NA
#> 10                 NA                          NA
#>                   comments_column
#> 1                                
#> 2  These are imaginary food items
#> 3                            <NA>
#> 4  With imaginary nutrient values
#> 5                                
#> 6                      And blanks
#> 7                            <NA>
#> 8       To test different outputs
#> 9                                
#> 10                  And scenarios

# We will start with two data.frames who's names do not line up.

Col_Comparison <- Column_Comparison(breakfast_df, breakfast_df_nonstandard)
#> ---------------------------
#> 
#> Number of times column names match:
#> 
#> FALSE  TRUE 
#>    19     2 
#> 
#> Columns do not match
#> 
#> ---------------------------

Col_Comparison
#>      From_breakfast_df        From_breakfast_df_nonstandard df1_equals_df2
#> 1            food_code                            food_code           TRUE
#> 2            food_name                            food_name           TRUE
#> 3               WATERg                       Water_values_g          FALSE
#> 4              PROCNTg                      PROCNT_values_g          FALSE
#> 5       FAT_g_combined                FAT_values_g_combined          FALSE
#> 6              CHOAVLg                      CHOAVL_values_g          FALSE
#> 7      FIBTGg_combined              FIBTG_values_g_combined          FALSE
#> 8                 ALCg                         ALC_values_g          FALSE
#> 9                 ASHg                         ASH_values_g          FALSE
#> 10              THIAmg                  Thiamine_milligrams          FALSE
#> 11           THIAHCLmg         Thiamine_from_HCL_milligrams          FALSE
#> 12            RETOLmcg                   Retinol_micrograms          FALSE
#> 13 CARTBEQmcg_combined Beta_Carotene_Equivalents_micrograms          FALSE
#> 14               NIAmg                    Niacin_milligrams          FALSE
#> 15               TRPmg                Tryptophan_milligrams          FALSE
#> 16             NIAEQmg                 Niacin_eq_milligrams          FALSE
#> 17            NIATRPmg                  Niacine_from_TRP_mg          FALSE
#> 18                FATg                             FAT_in_g          FALSE
#> 19               FAT_g                   FAT_unknown_calc_g          FALSE
#> 20              FATCEg          FAT_continuous_extraction_g          FALSE
#> 21            comments                      comments_column          FALSE

# Note how most of the columns do not match in their names, and the function
# tells you so.


# If the data.frames don't line up in terms of column numbers, then filler
# columns will be added.

breakfast_df_2 <- breakfast_df
breakfast_df_2$THIAmg <- NULL
breakfast_df_2$TRPmg <- NULL
breakfast_df_2$FAT_g <- NULL

Col_Comparison <- Column_Comparison(breakfast_df, breakfast_df_2)
#> breakfast_df is 3 columns wider than breakfast_df_2
#> ---------------------------
#> 
#> Number of times column names match:
#> 
#> FALSE  TRUE 
#>    12     9 
#> 
#> Columns do not match
#> 
#> ---------------------------

Col_Comparison
#>      From_breakfast_df From_breakfast_df_2 df1_equals_df2
#> 1            food_code           food_code           TRUE
#> 2            food_name           food_name           TRUE
#> 3               WATERg              WATERg           TRUE
#> 4              PROCNTg             PROCNTg           TRUE
#> 5       FAT_g_combined      FAT_g_combined           TRUE
#> 6              CHOAVLg             CHOAVLg           TRUE
#> 7      FIBTGg_combined     FIBTGg_combined           TRUE
#> 8                 ALCg                ALCg           TRUE
#> 9                 ASHg                ASHg           TRUE
#> 10              THIAmg           THIAHCLmg          FALSE
#> 11           THIAHCLmg            RETOLmcg          FALSE
#> 12            RETOLmcg CARTBEQmcg_combined          FALSE
#> 13 CARTBEQmcg_combined               NIAmg          FALSE
#> 14               NIAmg             NIAEQmg          FALSE
#> 15               TRPmg            NIATRPmg          FALSE
#> 16             NIAEQmg                FATg          FALSE
#> 17            NIATRPmg              FATCEg          FALSE
#> 18                FATg            comments          FALSE
#> 19               FAT_g        filler_col_1          FALSE
#> 20              FATCEg        filler_col_2          FALSE
#> 21            comments        filler_col_3          FALSE

# Note how new columns are added to make up the numbers - however, these
# columns are added to the end of the data.frame, causing a shift.

# If the columns line up perfectly however:

breakfast_df_copy <- breakfast_df

Col_Comparison <- Column_Comparison(breakfast_df, breakfast_df_copy)
#> ---------------------------
#> 
#> Number of times column names match:
#> 
#> TRUE 
#>   21 
#> 
#> Columns match
#> 
#> ---------------------------

Col_Comparison
#>      From_breakfast_df From_breakfast_df_copy df1_equals_df2
#> 1            food_code              food_code           TRUE
#> 2            food_name              food_name           TRUE
#> 3               WATERg                 WATERg           TRUE
#> 4              PROCNTg                PROCNTg           TRUE
#> 5       FAT_g_combined         FAT_g_combined           TRUE
#> 6              CHOAVLg                CHOAVLg           TRUE
#> 7      FIBTGg_combined        FIBTGg_combined           TRUE
#> 8                 ALCg                   ALCg           TRUE
#> 9                 ASHg                   ASHg           TRUE
#> 10              THIAmg                 THIAmg           TRUE
#> 11           THIAHCLmg              THIAHCLmg           TRUE
#> 12            RETOLmcg               RETOLmcg           TRUE
#> 13 CARTBEQmcg_combined    CARTBEQmcg_combined           TRUE
#> 14               NIAmg                  NIAmg           TRUE
#> 15               TRPmg                  TRPmg           TRUE
#> 16             NIAEQmg                NIAEQmg           TRUE
#> 17            NIATRPmg               NIATRPmg           TRUE
#> 18                FATg                   FATg           TRUE
#> 19               FAT_g                  FAT_g           TRUE
#> 20              FATCEg                 FATCEg           TRUE
#> 21            comments               comments           TRUE

# Then the Output shows this. It is possible to turn off the Summary
# Messages, however:


Col_Comparison <- Column_Comparison(breakfast_df, breakfast_df_copy,
Summary_Message = FALSE)

Col_Comparison
#>      From_breakfast_df From_breakfast_df_copy df1_equals_df2
#> 1            food_code              food_code           TRUE
#> 2            food_name              food_name           TRUE
#> 3               WATERg                 WATERg           TRUE
#> 4              PROCNTg                PROCNTg           TRUE
#> 5       FAT_g_combined         FAT_g_combined           TRUE
#> 6              CHOAVLg                CHOAVLg           TRUE
#> 7      FIBTGg_combined        FIBTGg_combined           TRUE
#> 8                 ALCg                   ALCg           TRUE
#> 9                 ASHg                   ASHg           TRUE
#> 10              THIAmg                 THIAmg           TRUE
#> 11           THIAHCLmg              THIAHCLmg           TRUE
#> 12            RETOLmcg               RETOLmcg           TRUE
#> 13 CARTBEQmcg_combined    CARTBEQmcg_combined           TRUE
#> 14               NIAmg                  NIAmg           TRUE
#> 15               TRPmg                  TRPmg           TRUE
#> 16             NIAEQmg                NIAEQmg           TRUE
#> 17            NIATRPmg               NIATRPmg           TRUE
#> 18                FATg                   FATg           TRUE
#> 19               FAT_g                  FAT_g           TRUE
#> 20              FATCEg                 FATCEg           TRUE
#> 21            comments               comments           TRUE