Compares the column names of the two input data.frames,
df1
and df2
, and displays whether they are the same or not.
Useful ahead of an rbind function. If the columns are uneven, the function
will create dummy columns to make up the numbers for the comparison.
Examples
# Example data.frames have been created to give an example of using the
# Column_Comparison to compare the column names.
breakfast_df
#> food_code food_name WATERg PROCNTg FAT_g_combined CHOAVLg
#> 1 F0001 Bacon 10 15 21 10
#> 2 F0002 Beans 15 10 12 1
#> 3 F0003 Toast 20 20 NA 24
#> 4 F0004 Mushroom 25 15 16 46
#> 5 F0005 Eggs 30 21 11 20
#> 6 F0006 Tomato 35 28 33 2
#> 7 F0007 Sausage 40 10 13 24
#> 8 F0008 Butter NA 27 16 22
#> 9 F0009 Brown Sauce NA NA NA NA
#> 10 F0010 Tomato Ketchup NA NA NA NA
#> FIBTGg_combined ALCg ASHg THIAmg THIAHCLmg RETOLmcg CARTBEQmcg_combined
#> 1 12 12 12 32 21 53 NA
#> 2 3 43 3 90 45 12 51
#> 3 8 8 28 130 20 20 91
#> 4 15 15 15 21 69 NA 22
#> 5 6 6 6 <NA> 42 62 62
#> 6 2 2 2 61 150 40 102
#> 7 9 9 9 21 23 140 32
#> 8 13 13 13 30 210 72
#> 9 NA NA NA 59 52 41 112
#> 10 NA NA NA <NA> NA NA NA
#> NIAmg TRPmg NIAEQmg NIATRPmg FATg FAT_g FATCEg
#> 1 172.0 126.0 85.3 2.10 21 20.9 NA
#> 2 NA 142.5 49.2 NA NA 12.0 NA
#> 3 24.4 142.7 86.4 2.38 NA NA NA
#> 4 NA 167.0 23.2 2.80 NA NA 16.0
#> 5 8.1 NA 30.5 NA 11 10.9 NA
#> 6 134.6 76.3 83.3 1.27 NA 33.0 33.0
#> 7 10.2 98.6 16.6 1.64 13 12.1 NA
#> 8 187.9 41.4 84.5 0.69 16 16.1 15.9
#> 9 92.0 172.0 17.7 2.87 NA NA NA
#> 10 NA NA NA NA NA NA NA
#> comments
#> 1
#> 2 These are imaginary food items
#> 3 <NA>
#> 4 With imaginary nutrient values
#> 5
#> 6 And blanks
#> 7 <NA>
#> 8 To test different outputs
#> 9
#> 10 And scenarios
#
breakfast_df_nonstandard
#> food_code food_name Water_values_g PROCNT_values_g
#> 1 F0001 Bacon 10 15
#> 2 F0002 Beans 15 10
#> 3 F0003 Toast 20 20
#> 4 F0004 Mushroom 25 15
#> 5 F0005 Eggs 30 21
#> 6 F0006 Tomato 35 28
#> 7 F0007 Sausage 40 10
#> 8 F0008 Butter NA 27
#> 9 F0009 Brown Sauce NA NA
#> 10 F0010 Tomato Ketchup NA NA
#> FAT_values_g_combined CHOAVL_values_g FIBTG_values_g_combined ALC_values_g
#> 1 21 10 12 12
#> 2 12 1 3 43
#> 3 NA 24 8 8
#> 4 16 46 15 15
#> 5 11 20 6 6
#> 6 33 2 2 2
#> 7 13 24 9 9
#> 8 16 22 13 13
#> 9 NA NA NA NA
#> 10 NA NA NA NA
#> ASH_values_g Thiamine_milligrams Thiamine_from_HCL_milligrams
#> 1 12 32 21
#> 2 3 90 45
#> 3 28 130 20
#> 4 15 21 69
#> 5 6 <NA> 42
#> 6 2 61 150
#> 7 9 21 23
#> 8 13 30
#> 9 NA 59 52
#> 10 NA <NA> NA
#> Retinol_micrograms Beta_Carotene_Equivalents_micrograms Niacin_milligrams
#> 1 53 NA 172.0
#> 2 12 51 NA
#> 3 20 91 24.4
#> 4 NA 22 NA
#> 5 62 62 8.1
#> 6 40 102 134.6
#> 7 140 32 10.2
#> 8 210 72 187.9
#> 9 41 112 92.0
#> 10 NA NA NA
#> Tryptophan_milligrams Niacin_eq_milligrams Niacine_from_TRP_mg FAT_in_g
#> 1 126.0 85.3 2.10 21
#> 2 142.5 49.2 NA NA
#> 3 142.7 86.4 2.38 NA
#> 4 167.0 23.2 2.80 NA
#> 5 NA 30.5 NA 11
#> 6 76.3 83.3 1.27 NA
#> 7 98.6 16.6 1.64 13
#> 8 41.4 84.5 0.69 16
#> 9 172.0 17.7 2.87 NA
#> 10 NA NA NA NA
#> FAT_unknown_calc_g FAT_continuous_extraction_g
#> 1 20.9 NA
#> 2 12.0 NA
#> 3 NA NA
#> 4 NA 16.0
#> 5 10.9 NA
#> 6 33.0 33.0
#> 7 12.1 NA
#> 8 16.1 15.9
#> 9 NA NA
#> 10 NA NA
#> comments_column
#> 1
#> 2 These are imaginary food items
#> 3 <NA>
#> 4 With imaginary nutrient values
#> 5
#> 6 And blanks
#> 7 <NA>
#> 8 To test different outputs
#> 9
#> 10 And scenarios
# We will start with two data.frames who's names do not line up.
Col_Comparison <- Column_Comparison(breakfast_df, breakfast_df_nonstandard)
#> ---------------------------
#>
#> Number of times column names match:
#>
#> FALSE TRUE
#> 19 2
#>
#> Columns do not match
#>
#> ---------------------------
Col_Comparison
#> From_breakfast_df From_breakfast_df_nonstandard df1_equals_df2
#> 1 food_code food_code TRUE
#> 2 food_name food_name TRUE
#> 3 WATERg Water_values_g FALSE
#> 4 PROCNTg PROCNT_values_g FALSE
#> 5 FAT_g_combined FAT_values_g_combined FALSE
#> 6 CHOAVLg CHOAVL_values_g FALSE
#> 7 FIBTGg_combined FIBTG_values_g_combined FALSE
#> 8 ALCg ALC_values_g FALSE
#> 9 ASHg ASH_values_g FALSE
#> 10 THIAmg Thiamine_milligrams FALSE
#> 11 THIAHCLmg Thiamine_from_HCL_milligrams FALSE
#> 12 RETOLmcg Retinol_micrograms FALSE
#> 13 CARTBEQmcg_combined Beta_Carotene_Equivalents_micrograms FALSE
#> 14 NIAmg Niacin_milligrams FALSE
#> 15 TRPmg Tryptophan_milligrams FALSE
#> 16 NIAEQmg Niacin_eq_milligrams FALSE
#> 17 NIATRPmg Niacine_from_TRP_mg FALSE
#> 18 FATg FAT_in_g FALSE
#> 19 FAT_g FAT_unknown_calc_g FALSE
#> 20 FATCEg FAT_continuous_extraction_g FALSE
#> 21 comments comments_column FALSE
# Note how most of the columns do not match in their names, and the function
# tells you so.
# If the data.frames don't line up in terms of column numbers, then filler
# columns will be added.
breakfast_df_2 <- breakfast_df
breakfast_df_2$THIAmg <- NULL
breakfast_df_2$TRPmg <- NULL
breakfast_df_2$FAT_g <- NULL
Col_Comparison <- Column_Comparison(breakfast_df, breakfast_df_2)
#> breakfast_df is 3 columns wider than breakfast_df_2
#> ---------------------------
#>
#> Number of times column names match:
#>
#> FALSE TRUE
#> 12 9
#>
#> Columns do not match
#>
#> ---------------------------
Col_Comparison
#> From_breakfast_df From_breakfast_df_2 df1_equals_df2
#> 1 food_code food_code TRUE
#> 2 food_name food_name TRUE
#> 3 WATERg WATERg TRUE
#> 4 PROCNTg PROCNTg TRUE
#> 5 FAT_g_combined FAT_g_combined TRUE
#> 6 CHOAVLg CHOAVLg TRUE
#> 7 FIBTGg_combined FIBTGg_combined TRUE
#> 8 ALCg ALCg TRUE
#> 9 ASHg ASHg TRUE
#> 10 THIAmg THIAHCLmg FALSE
#> 11 THIAHCLmg RETOLmcg FALSE
#> 12 RETOLmcg CARTBEQmcg_combined FALSE
#> 13 CARTBEQmcg_combined NIAmg FALSE
#> 14 NIAmg NIAEQmg FALSE
#> 15 TRPmg NIATRPmg FALSE
#> 16 NIAEQmg FATg FALSE
#> 17 NIATRPmg FATCEg FALSE
#> 18 FATg comments FALSE
#> 19 FAT_g filler_col_1 FALSE
#> 20 FATCEg filler_col_2 FALSE
#> 21 comments filler_col_3 FALSE
# Note how new columns are added to make up the numbers - however, these
# columns are added to the end of the data.frame, causing a shift.
# If the columns line up perfectly however:
breakfast_df_copy <- breakfast_df
Col_Comparison <- Column_Comparison(breakfast_df, breakfast_df_copy)
#> ---------------------------
#>
#> Number of times column names match:
#>
#> TRUE
#> 21
#>
#> Columns match
#>
#> ---------------------------
Col_Comparison
#> From_breakfast_df From_breakfast_df_copy df1_equals_df2
#> 1 food_code food_code TRUE
#> 2 food_name food_name TRUE
#> 3 WATERg WATERg TRUE
#> 4 PROCNTg PROCNTg TRUE
#> 5 FAT_g_combined FAT_g_combined TRUE
#> 6 CHOAVLg CHOAVLg TRUE
#> 7 FIBTGg_combined FIBTGg_combined TRUE
#> 8 ALCg ALCg TRUE
#> 9 ASHg ASHg TRUE
#> 10 THIAmg THIAmg TRUE
#> 11 THIAHCLmg THIAHCLmg TRUE
#> 12 RETOLmcg RETOLmcg TRUE
#> 13 CARTBEQmcg_combined CARTBEQmcg_combined TRUE
#> 14 NIAmg NIAmg TRUE
#> 15 TRPmg TRPmg TRUE
#> 16 NIAEQmg NIAEQmg TRUE
#> 17 NIATRPmg NIATRPmg TRUE
#> 18 FATg FATg TRUE
#> 19 FAT_g FAT_g TRUE
#> 20 FATCEg FATCEg TRUE
#> 21 comments comments TRUE
# Then the Output shows this. It is possible to turn off the Summary
# Messages, however:
Col_Comparison <- Column_Comparison(breakfast_df, breakfast_df_copy,
Summary_Message = FALSE)
Col_Comparison
#> From_breakfast_df From_breakfast_df_copy df1_equals_df2
#> 1 food_code food_code TRUE
#> 2 food_name food_name TRUE
#> 3 WATERg WATERg TRUE
#> 4 PROCNTg PROCNTg TRUE
#> 5 FAT_g_combined FAT_g_combined TRUE
#> 6 CHOAVLg CHOAVLg TRUE
#> 7 FIBTGg_combined FIBTGg_combined TRUE
#> 8 ALCg ALCg TRUE
#> 9 ASHg ASHg TRUE
#> 10 THIAmg THIAmg TRUE
#> 11 THIAHCLmg THIAHCLmg TRUE
#> 12 RETOLmcg RETOLmcg TRUE
#> 13 CARTBEQmcg_combined CARTBEQmcg_combined TRUE
#> 14 NIAmg NIAmg TRUE
#> 15 TRPmg TRPmg TRUE
#> 16 NIAEQmg NIAEQmg TRUE
#> 17 NIATRPmg NIATRPmg TRUE
#> 18 FATg FATg TRUE
#> 19 FAT_g FAT_g TRUE
#> 20 FATCEg FATCEg TRUE
#> 21 comments comments TRUE