-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathparser.php
104 lines (83 loc) · 3.2 KB
/
parser.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
<?php
require_once 'CSVReader.php'; // Include the CSVReader class
class Product
{
public $make;
public $model;
public $condition;
public $grade;
public $capacity;
public $colour;
public $network;
public function __construct($data)
{
foreach ($data as $property => $value) {
$this->$property = $value;
}
}
}
class DataProcessor
{
public function processCSVData($data)
{
$products = [];
$uniqueCombinations = [];
foreach ($data as $record) {
$products[] = new Product($record);
// Generate a unique combination key
$combinationKey = implode('-', array_values($record));
// Count unique combinations
if (isset($uniqueCombinations[$combinationKey])) {
$uniqueCombinations[$combinationKey]['count']++;
} else {
$uniqueCombinations[$combinationKey] = $record;
$uniqueCombinations[$combinationKey]['count'] = 1;
}
// Display product object representation
print_r(new Product($record));
}
return [$products, $uniqueCombinations];
}
}
try {
if ($argc < 4 || $argv[1] !== '--file' || $argv[3] !== '--unique-combinations') {
die("Usage: php parser.php --file <filename.csv> --unique-combinations <output.csv>\n");
}
$csvFileName = $argv[2];
$outputFileName = $argv[4]; // Get the value of --unique-combinations
// Determine the delimiter based on the file extension
$fileExtension = pathinfo($csvFileName, PATHINFO_EXTENSION);
$delimiter = $fileExtension === 'tsv' ? "\t" : ",";
// Create an instance of CSVReader with the specified delimiter
$csvReader = new CSVReader($csvFileName, $delimiter);
$data = $csvReader->readCSV();
// Specify the batch size
$batchSize = 100;
$totalRows = count($data);
$batchCount = ceil($totalRows / $batchSize);
$dataProcessor = new DataProcessor();
for ($batchIndex = 0; $batchIndex < $batchCount; $batchIndex++) {
$startRow = $batchIndex * $batchSize;
$endRow = min(($batchIndex + 1) * $batchSize, $totalRows);
$batchData = array_slice($data, $startRow, $endRow - $startRow);
list($products, $uniqueCombinations) = $dataProcessor->processCSVData($batchData);
// Append batch results to the output file
$outputFile = fopen($outputFileName, "a");
if ($outputFile !== false) {
if ($batchIndex === 0) {
$header = array_keys(get_object_vars($products[0]));
$header[] = 'count'; // Add "count" to the header
fputcsv($outputFile, $header);
}
foreach ($uniqueCombinations as $combination) {
fputcsv($outputFile, $combination);
}
fclose($outputFile);
} else {
echo "Error: Unable to open the output file '$outputFileName' for writing.\n";
}
}
echo "Unique combinations written to '$outputFileName'\n";
} catch (Exception $e) {
echo $e->getMessage() . PHP_EOL;
}