This answer isn't necessarily better than what's already been posted, but as an illustration of how I approach problems like this it might be useful, especially if you're not used to working with Python's interactive interpreter.
I've started out knowing two things about this problem. First, I'm going to use itertools.groupby
to group the input into lists of data lines, one list for each individual data record. Second, I want to represent those records as dictionaries so that I can easily format the output.
One other thing that this shows is how using generators makes breaking a problem like this down into small parts easy.
>>> # first let's create some useful test data and put it into something
>>> # we can easily iterate over:
>>> data = """ID: 1
Name: X
FamilyN: Y
Age: 20
ID: 2
Name: H
FamilyN: F
Age: 23
ID: 3
Name: S
FamilyN: Y
Age: 13"""
>>> data = data.split("\n")
>>> # now we need a key function for itertools.groupby.
>>> # the key we'll be grouping by is, essentially, whether or not
>>> # the line is empty.
>>> # this will make groupby return groups whose key is True if we
>>> care about them.
>>> def is_data(line):
return True if line.strip() else False
>>> # make sure this really works
>>> "\n".join([line for line in data if is_data(line)])
'ID: 1\nName: X\nFamilyN: Y\nAge: 20\nID: 2\nName: H\nFamilyN: F\nAge: 23\nID: 3\nName: S\nFamilyN: Y\nAge: 13\nID: 4\nName: M\nFamilyN: Z\nAge: 25'
>>> # does groupby return what we expect?
>>> import itertools
>>> [list(value) for (key, value) in itertools.groupby(data, is_data) if key]
[['ID: 1', 'Name: X', 'FamilyN: Y', 'Age: 20'], ['ID: 2', 'Name: H', 'FamilyN: F', 'Age: 23'], ['ID: 3', 'Name: S', 'FamilyN: Y', 'Age: 13'], ['ID: 4', 'Name: M', 'FamilyN: Z', 'Age: 25']]
>>> # what we really want is for each item in the group to be a tuple
>>> # that's a key/value pair, so that we can easily create a dictionary
>>> # from each item.
>>> def make_key_value_pair(item):
items = item.split(":")
return (items[0].strip(), items[1].strip())
>>> make_key_value_pair("a: b")
('a', 'b')
>>> # let's test this:
>>> dict(make_key_value_pair(item) for item in ["a:1", "b:2", "c:3"])
{'a': '1', 'c': '3', 'b': '2'}
>>> # we could conceivably do all this in one line of code, but this
>>> # will be much more readable as a function:
>>> def get_data_as_dicts(data):
for (key, value) in itertools.groupby(data, is_data):
if key:
yield dict(make_key_value_pair(item) for item in value)
>>> list(get_data_as_dicts(data))
[{'FamilyN': 'Y', 'Age': '20', 'ID': '1', 'Name': 'X'}, {'FamilyN': 'F', 'Age': '23', 'ID': '2', 'Name': 'H'}, {'FamilyN': 'Y', 'Age': '13', 'ID': '3', 'Name': 'S'}, {'FamilyN': 'Z', 'Age': '25', 'ID': '4', 'Name': 'M'}]
>>> # now for an old trick: using a list of column names to drive the output.
>>> columns = ["Name", "FamilyN", "Age"]
>>> print "\n".join(" ".join(d[c] for c in columns) for d in get_data_as_dicts(data))
X Y 20
H F 23
S Y 13
M Z 25
>>> # okay, let's package this all into one function that takes a filename
>>> def get_formatted_data(filename):
with open(filename, "r") as f:
columns = ["Name", "FamilyN", "Age"]
for d in get_data_as_dicts(f):
yield " ".join(d[c] for c in columns)
>>> print "\n".join(get_formatted_data("c:\\temp\\test_data.txt"))
X Y 20
H F 23
S Y 13
M Z 25