group_by_column.awk (789B)
#!/bin/awk
#backstory: data format from dataprovider came in a weird way, made it necessary to compress
#some rows before adding them to the database, to reduce table sizes and boost JOIN speeds
#this part compressed rows to a fewer number which cut a table by ~60% which resulted in a decent boost on the DB side
#also see: reorganize_groups.awk
BEGIN {
FS = ",";
OFS = ",";
}
{
# If we've already seen this group ($3), append to existing value
if (a[$3]) {
a[$3] = a[$3] "," $2 "@" $1;
} else {
# First entry for this group
a[$3] = $2 "@" $1;
}
}
END {
# Output all grouped values
for (x in a) {
print x, a[x];
}
}
#IN:
#foo,123,groupA
#bar,456,groupA
#baz,789,groupB
#OUT:
#groupA,123@foo,456@bar
#groupB,789@baz