python - split a row into columns - csv data

  • Last Update :
  • Techknowledgy :

You can create three separate lists, and then append to each using csv.reader.

import csv

c1 = []
c2 = []
c3 = []
with open('Half-life.csv', 'r') as f:
   reader = csv.reader(f, delimiter = ',')
for row in reader:
   c1.append(row[0])
c2.append(row[1])
c3.append(row[2])

A little more automatic and flexible version of Alexander's answer:

import csv
from collections
import defaultdict

columns = defaultdict(list)
with open('Half-life.csv', 'r') as f:
   reader = csv.reader(f, delimiter = ',')
for row in reader:
   for i in range(len(row)):
   columns[i].append(row[i])
# Following line is only necessary
if you want a key error
for invalid column numbers
columns = dict(columns)

You could also modify this to use column headers instead of column numbers.

import csv
from collections
import defaultdict

columns = defaultdict(list)
with open('Half-life.csv', 'r') as f:
   reader = csv.reader(f, delimiter = ',')
headers = next(reader)
column_nums = range(len(headers)) # Do NOT change to xrange
for row in reader:
   for i in column_nums:
   columns[headers[i]].append(row[i])
# Following line is only necessary
if you want a key error
for invalid column names
columns = dict(columns)

Another option, if you have numpy installed, you can use loadtxt to read a csv file into a numpy array. You can then transpose the array if you want more columns than rows (I wasn't quite clear on how you wanted the data to look). For example:

import numpy as np
# Load data
data = np.loadtxt('csv_file.csv', delimiter = ',')
# Transpose data
if needs be
data = np.transpose(data)

Suggestion : 2

Split (reshape) CSV strings in columns into multiple rows, having one element per row , Split (reshape) CSV strings in columns into multiple rows, having one element per row , Reshaping and pivoting , Reshaping and pivoting

Example

import pandas as pd

df = pd.DataFrame([{
      'var1': 'a,b,c',
      'var2': 1,
      'var3': 'XX'
   },
   {
      'var1': 'd,e,f,x,y',
      'var2': 2,
      'var3': 'ZZ'
   }
])

print(df)

reshaped = \
   (df.set_index(df.columns.drop('var1', 1).tolist())
      .var1.str.split(',', expand = True)
      .stack()
      .reset_index()
      .rename(columns = {
         0: 'var1'
      })
      .loc[: , df.columns]
   )

print(reshaped)

Output:

        var1 var2 var3
        0 a, b, c 1 XX
        1 d, e, f, x, y 2 ZZ

        var1 var2 var3
        0 a 1 XX
        1 b 1 XX
        2 c 1 XX
        3 d 2 ZZ
        4 e 2 ZZ
        5 f 2 ZZ
        6 x 2 ZZ
        7 y 2 ZZ

Suggestion : 3

Split Column Delimiter : ':' ,columns number in new csv file is fixed,The used regex doesn’t hardcode any field, allows spaces in names and surnames, capital Os in the notes, and ID fields that are not just 8-digit numbers.,Split Row Delimiter : ' O&-' where & can be only 'K' or 'Z',(One Team can contain many members, there is no upper limit)

+ -- -- -- -- + -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- +
|
Team | Members |
   + -- -- -- -- + -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- +
   |
   Team 1 | OK - 10: Jason: Jones: ID No: 00000000: male: my notes |
   |
   Team 2 | OK - 10: Mike: James: ID No: 00000001: male: my notes OZ - 09: John: Rick: ID No: 00000002: male: my notes |
   |
   Team 3 | OK - 08: Michael: Knight: ID No: 00000004: male: my notes2 OK - 09: Helen: Rick: ID No: 00000005: female: my notes3 OZ - 10: Jane: James: ID No: 00000034: female: my notes23 OK - 09: Mary: Jane: ID No: 00000023: female: my notes46 |
   + -- -- -- -- + -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- +
"Team", "Members"
Team 1, OK - 10: Jason: Jones: ID No: 00000000: male: my notes
Team 2, OK - 10: Mike: James: ID No: 00000001: male: my notes OZ - 09: John: Rick: ID No: 00000002: male: my notes
Team 3, OK - 08: Michael: Knight: ID No: 00000004: male: my notes2 OK - 09: Helen: Rick: ID No: 00000005: female: my notes3 OZ - 10: Jane: James: ID No: 00000034: female: my notes23 OK - 09: Mary: Jane: ID No: 00000023: female: my notes46
+ -- -- -- - + -- -- -- -- -- -- - + -- -- -- -- -- -- - + -- -- -- -- -- -- -- -- + -- -- -- -- -- -- -- -- -- + -- -- -- -- -- -- -- - + -- -- -- -- -- -- -- - + -- -- -- -- -- -- -- +
|
Team | Member_Rank | Member_Name | Member_Surname | Member_ID_Method | Member_ID_Num | Member_Gender | Member_Notes |
   + -- -- -- - + -- -- -- -- -- -- - + -- -- -- -- -- -- - + -- -- -- -- -- -- -- -- + -- -- -- -- -- -- -- -- -- + -- -- -- -- -- -- -- - + -- -- -- -- -- -- -- - + -- -- -- -- -- -- -- +
   |
   Team1 | OK - 10 | Jason | Jones | ID No | 00000000 | male | my notes |
   |
   Team2 | OK - 10 | Mike | James | ID No | 00000001 | male | my notes |
   |
   Team2 | OZ - 09 | John | Rick | ID No | 00000002 | male | my notes |
   + -- -- -- - + -- -- -- -- -- -- - + -- -- -- -- -- -- - + -- -- -- -- -- -- -- -- + -- -- -- -- -- -- -- -- -- + -- -- -- -- -- -- -- - + -- -- -- -- -- -- -- - + -- -- -- -- -- -- -- +
import csv
import re

members_split_regex = re.compile(r '(O[KZ]-d+):([a-zA-Z0-9 ]+):([a-zA-Z0-9 ]+):([a-zA-Z0-9 ]+):([a-zA-Z0-9 ]+):([a-zA-Z0-9 ]+):([a-zA-Z0-9 ]+)(?= O[KZ]|$)')

with open('test.csv') as input_file, open('output_csv.csv', 'w', newline = '') as output_file:
   csv_reader = csv.DictReader(input_file)
fieldnames = csv_reader.fieldnames.copy()
fieldnames.remove('Members')
csv_writer = csv.DictWriter(output_file, extrasaction = 'ignore', fieldnames = fieldnames + ['Member_Rank', 'Member_Name', 'Member_Surname', 'Member_ID_Method', 'Member_ID_Num', 'Member_Gender', 'Member_Notes'])
csv_writer.writeheader()
for row in csv_reader:
   for member_tuple in members_split_regex.findall(row['Members']):
   member_dict = {}
   (
      member_dict['Member_Rank'],
      member_dict['Member_Name'],
      member_dict['Member_Surname'],
      member_dict['Member_ID_Method'],
      member_dict['Member_ID_Num'],
      member_dict['Member_Gender'],
      member_dict['Member_Notes']
   ) = member_tuple
print(row['Members'])
print(member_tuple)
member_dict.update(row)
csv_writer.writerow(member_dict)
import csv
import re

members_split_regex = re.compile(r '(O[KZ]-d+):([a-zA-Z0-9 ]+):([a-zA-Z0-9 ]+):([a-zA-Z0-9 ]+):([a-zA-Z0-9 ]+):([a-zA-Z0-9 ]+):([a-zA-Z0-9 ]+)(?= O[KZ]|$)')

with open('test.csv') as input_file, open('output_csv', 'w', newline = '') as output_file:
   csv_reader = csv.DictReader(input_file)
fieldnames = csv_reader.fieldnames.copy()
fieldnames.remove('Members')
csv_writer = csv.DictWriter(output_file, extrasaction = 'ignore', fieldnames = fieldnames + ['Member_Rank', 'Member_Name', 'Member_Surname', 'Member_ID_Method', 'Member_ID_Num', 'Member_Gender', 'Member_Notes'])
csv_writer.writeheader()
for row in csv_reader:
   for member_tuple in members_split_regex.findall(row['Members']):
   member_dict = {}
   (
      member_dict['Member_Rank'],
      member_dict['Member_Name'],
      member_dict['Member_Surname'],
      member_dict['Member_ID_Method'],
      member_dict['Member_ID_Num'],
      member_dict['Member_Gender'],
      member_dict['Member_Notes']
   ) = member_tuple
member_dict.update(row)
csv_writer.writerow(member_dict)