import pandas as pd
import re
data = {
'engine': [
'150 HP 2000L Fuel: Petrol 4 cylinders',
'Fuel: Diesel',
'180 HP 2500L Fuel: Petrol 6 cylinders',
'120 HP Fuel: Gasoline',
'130 HP 1600L'
]
}
df = pd.DataFrame(data)
def extract_engine_info(engine):
hp = re.search(r'(\d+)\s*HP', engine)
size = re.search(r'(\d+)\s*L', engine)
fuel = re.search(r'Fuel:\s*([A-Za-z]+)', engine)
cylinders = re.search(r'(\d+)\s*cylinders', engine)
return {
'horse_power': hp.group(1) if hp else None,
'engine_size': size.group(1) if size else None,
'fuel_type': fuel.group(1) if fuel else None,
'cylinders': cylinders.group(1) if cylinders else None
}
engine_info = df['engine'].apply(extract_engine_info)
engine_df = pd.DataFrame(engine_info.tolist())
df = pd.concat([df, engine_df], axis=1)
print(engine_df)
aW1wb3J0IHBhbmRhcyBhcyBwZAppbXBvcnQgcmUKCmRhdGEgPSB7CiAgICAnZW5naW5lJzogWwogICAgICAgICcxNTAgSFAgMjAwMEwgRnVlbDogUGV0cm9sIDQgY3lsaW5kZXJzJywKICAgICAgICAnRnVlbDogRGllc2VsJywKICAgICAgICAnMTgwIEhQIDI1MDBMIEZ1ZWw6IFBldHJvbCA2IGN5bGluZGVycycsCiAgICAgICAgJzEyMCBIUCBGdWVsOiBHYXNvbGluZScsCiAgICAgICAgJzEzMCBIUCAxNjAwTCcKICAgIF0KfQoKZGYgPSBwZC5EYXRhRnJhbWUoZGF0YSkKZGVmIGV4dHJhY3RfZW5naW5lX2luZm8oZW5naW5lKToKICAgIGhwID0gcmUuc2VhcmNoKHInKFxkKylccypIUCcsIGVuZ2luZSkKICAgIHNpemUgPSByZS5zZWFyY2gocicoXGQrKVxzKkwnLCBlbmdpbmUpCiAgICBmdWVsID0gcmUuc2VhcmNoKHInRnVlbDpccyooW0EtWmEtel0rKScsIGVuZ2luZSkKICAgIGN5bGluZGVycyA9IHJlLnNlYXJjaChyJyhcZCspXHMqY3lsaW5kZXJzJywgZW5naW5lKQogICAgCiAgICByZXR1cm4gewogICAgICAgICdob3JzZV9wb3dlcic6IGhwLmdyb3VwKDEpIGlmIGhwIGVsc2UgTm9uZSwKICAgICAgICAnZW5naW5lX3NpemUnOiBzaXplLmdyb3VwKDEpIGlmIHNpemUgZWxzZSBOb25lLAogICAgICAgICdmdWVsX3R5cGUnOiBmdWVsLmdyb3VwKDEpIGlmIGZ1ZWwgZWxzZSBOb25lLAogICAgICAgICdjeWxpbmRlcnMnOiBjeWxpbmRlcnMuZ3JvdXAoMSkgaWYgY3lsaW5kZXJzIGVsc2UgTm9uZQogICAgfQplbmdpbmVfaW5mbyA9IGRmWydlbmdpbmUnXS5hcHBseShleHRyYWN0X2VuZ2luZV9pbmZvKQplbmdpbmVfZGYgPSBwZC5EYXRhRnJhbWUoZW5naW5lX2luZm8udG9saXN0KCkpCmRmID0gcGQuY29uY2F0KFtkZiwgZW5naW5lX2RmXSwgYXhpcz0xKQoKcHJpbnQoZW5naW5lX2RmKQ==