-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmax_min.py
209 lines (145 loc) · 6.31 KB
/
max_min.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
#!/usr/bin/env python
# coding: utf-8
# # Margins Min-Max (Cervix cancer data)
#
# Python code to extract the maximum and minimum values from motion data of MR-Linac patients.
# Margins are drawn around key structures, including CTV, GTV, and organs at risk.
# This is for pelvic MR-Linac only - but can be adapted towards other margins projects.
#
# ## File structure:
# patient MRI data is stored in csv, by week and imaging type.
# prior Python script has already done a comparison between margins of various weeks
#
# ## Comparisons:
# Intrafraction = within same week, comparing motion of scans over time in same day
# Interfraction = same patient but across different weeks
#
# ## Sections
# Part 1 - importing libraries, declaring all functions to read files for each patient
# Part 2 - defining target folders, parameters and regions of interest
# Part 3 - running the script
#
# # Part 1:
# In[1]:
import pandas as pd
import os
import csv
import re
def splitter_base_comparison(comparison_scan, original_df, roi):
"""Splits the original csv into two dataframes - base scan and comparison scan for ROI.
Arguments:
comparison_scan = comparison week scan in pandas dataFrame
original_df = full original csv in pandas dataFrame
roi = string of region of interest, ie. "Bladder"
"""
# get indices for region of interest
df_idx = comparison_scan.index[comparison_scan['roi'] == roi].tolist()
# get base week df
df_base = original_df[original_df['roi'].str.match(roi)]
df_base = df_base[df_base['R.x'].eq(0)] # get base week data
return df_base, comparison_scan.loc[df_idx]
def getLRAPS(df, roi):
"""Returns minimum and maximum values for motions in 3 axes"""
# removes all useless rows that are zero values
# in dataset, zero values are the base week
# comparison week has values
comparison_scan = df[(df != 0).all(1)]
df_base, dataFrame = splitter_base_comparison(comparison_scan, df, roi)
# refZ = dataFrame.iloc[0]['RefZ'] // not used at the moment
# zcompareMax - zbaseMAx
supz = float(dataFrame['z-RefZ'].max()) - df_base['z-RefZ'].max()
# zcompareMin - zbasemin
infz = float(dataFrame['z-RefZ'].min()) - df_base['z-RefZ'].min()
# get min and max for each paramater
rx_max = dataFrame['R.x'].max()
rx_min = dataFrame['R.x'].min()
lx_max = dataFrame['L.x'].max()
lx_min = dataFrame['L.x'].min()
py_max = dataFrame['P.y'].max()
py_min = dataFrame['P.y'].min()
ay_max = dataFrame['A.y'].max()
ay_min = dataFrame['A.y'].min()
return supz, infz, rx_max, rx_min, lx_max, lx_min, py_max, py_min, ay_max, ay_min
# In[2]:
def getStudyMetadata(filename):
"""extract patient metadata out of namefile"""
patientNum = filename[0:7]
baseWeek = filename[13:16]
baseModality = ""
temp_baseModality = re.search(r"\=....(.*)...\_", filename)
if temp_baseModality:
baseModality = (temp_baseModality.group(1))
comparisonWeek = ""
temp_comparisonWeek = re.search(r"\_vs_(Wk\d)", filename)
if temp_comparisonWeek:
comparisonWeek = (temp_comparisonWeek.group(1))
comparisonModality = filename[29:-4]
comparisonModality = ""
temp_comparisonModality = re.search(r"\_vs_... (.*)\.", filename)
if temp_comparisonModality:
comparisonModality = (temp_comparisonModality.group(1))
fraction = ''
if baseWeek == comparisonWeek:
fraction = 'intrafraction'
else:
fraction = 'interfraction'
return patientNum, baseWeek, baseModality, comparisonWeek, comparisonModality, fraction
# In[3]:
# main function
def calculate_points(dir_list, roi):
output_file_paths = [] # init output_paths
for folder in dir_list:
for filename in os.listdir(folder) :
if filename.endswith('.csv') and filename.startswith('Z') and ("SUPINF" not in filename):
df = pd.read_csv(os.path.join(folder, filename))
roi = roi
output = []
# populates output with patient metadata for naming files later
output.extend(list(getStudyMetadata(filename)))
for n in roi:
output.extend(list(getLRAPS(df, n)))
# separate csv for inter and intrafraction
# open and output to new CSV file
os.makedirs(os.path.join(folder, 'output'), exist_ok=True)
with open(os.path.join(folder, 'output/{}{}.csv'.format(output[5],folder[-3:])), 'a') as file_:
output_file_paths.append(os.path.realpath(file_.name))
wr = csv.writer(file_, delimiter=',')
wr.writerow(output)
print('Done extract for ', filename, output[5])
return list(set(output_file_paths))
# In[4]:
# write full headers recursively
def write_headers(roi, headers, output_file_paths):
for n in roi:
for l in param:
headers.append(n + ' ' + l)
# get output file paths and to add headers to output files
for file_path in output_file_paths:
os.makedirs(os.path.dirname(file_path), exist_ok=True)
add_header = pd.read_csv(file_path, names=headers, index_col=None)
add_header.to_csv(file_path)
print('Done header:' + file_path)
# # Part 2: Specifying folders and variables
# In[5]:
# use os.join and os.getcdw to pull all filenames
# init variables folder_names,
folder_name = [
'Dec20_data/Interfraction/Interfraction 3D 0.8',
'Dec20_data/Interfraction/Interfraction DIXON 2.0',
'Dec20_data/Intrafraction 3D vs DIXON HR IP 2.0'
]
dir_list = []
for i in range(len(folder_name)):
dir_list.append(
os.path.join(os.getcwd(), folder_name[i])
)
roi = ['GTV_T', 'CTV_Clin', 'CTV_SmallVol', 'Bladder', 'Rectum']
headers = ['patientNum', 'baseWeek', 'baseModality', 'comparisonWeek', 'comparisonModaility', 'fraction']
param = ['supz', 'infz', 'rx_max', 'rx_min', 'lx_max', 'lx_min', 'py_max', 'py_min', 'ay_max', 'ay_min']
# # Part 3: Running script
# In[6]:
# execute all functions
# 1. do all calculations
output_file_paths = calculate_points(dir_list, roi)
# 2. write all headers'
write_headers(roi, headers, output_file_paths)