Memproses fail CSV yang besar dengan cekap ialah keperluan biasa dalam banyak aplikasi, daripada analisis data kepada proses ETL (Ekstrak, Transformasi, Muatkan). Dalam artikel ini, saya ingin menanda aras prestasi empat bahasa pengaturcaraan popular—Golang, NodeJS dengan NestJS, PHP dan Python—dalam mengendalikan fail CSV yang besar pada MacBook Pro M1. Saya berhasrat untuk menentukan bahasa yang menyediakan prestasi terbaik untuk tugasan ini.
Perkakasan: MacBook Pro M1, 256GB SSD, 8GB RAM
Perisian:
Saya menggunakan fail CSV sintetik bernama sales_data.csv dengan kira-kira 1 juta baris, setiap satu mengandungi butiran transaksi seperti transaction_id, product_id, kuantiti, harga dan cap waktu.
Untuk setiap bahasa, skrip melaksanakan tugas berikut:
Berikut ialah skrip yang digunakan untuk setiap bahasa:
sales.go
package main import ( "encoding/csv" "fmt" "os" "strconv" "time" ) func main() { start := time.Now() file, err := os.Open("../generate-csv/sales_data.csv") if err != nil { fmt.Println("Error:", err) return } defer file.Close() reader := csv.NewReader(file) _, _ = reader.Read() // Skip header totalSales := 0.0 productSales := make(map[string]float64) for { line, err := reader.Read() if err != nil { break } productID := line[1] quantity, _ := strconv.Atoi(line[2]) price, _ := strconv.ParseFloat(line[3], 64) total := float64(quantity) * price totalSales += total productSales[productID] += total } var topProduct string var topSales float64 for product, sales := range productSales { if sales > topSales { topProduct = product topSales = sales } } elapsed := time.Since(start) fmt.Printf("Golang Execution time: %s\n", elapsed) fmt.Printf("Total Sales: $%.2f\n", totalSales) fmt.Printf("Top Product: %s with sales $%.2f\n", topProduct, topSales) }
csv.service.ts
import { Injectable } from '@nestjs/common'; import * as fs from 'fs'; import * as fastcsv from 'fast-csv'; // path file CSV const GLOBAL_CSV_PATH = '../generate-csv/sales_data.csv'; @Injectable() @Injectable() export class CsvService { async parseCsv(): Promise<{ nestExecutionTime: number; totalSales: number; topProductSales: number; }> { return new Promise((resolve, reject) => { const startTime = process.hrtime(); let totalSales = 0; const productSales: { [key: string]: number } = {}; fs.createReadStream(GLOBAL_CSV_PATH) .pipe(fastcsv.parse({ headers: true, delimiter: ',' })) .on('data', (row) => { const productID = row.product_id; const quantity = parseInt(row.quantity, 10); const price = parseFloat(row.price); const total = quantity * price; totalSales += total; if (!productSales[productID]) { productSales[productID] = 0; } productSales[productID] += total; }) .on('end', () => { const topProduct = Object.keys(productSales).reduce((a, b) => productSales[a] > productSales[b] ? a : b, ); const topProductSales = productSales[topProduct] || 0; const endTime = process.hrtime(startTime); const nestExecutionTime = endTime[0] + endTime[1] / 1e9; console.log(`NestJS Execution time: ${nestExecutionTime} seconds`); console.log(`Total Sales: $${totalSales}`); console.log( `Top Product: ${topProduct} with sales $${topProductSales}`, ); resolve({ nestExecutionTime, totalSales, topProductSales, }); }) .on('error', (error) => reject(error)); }); } }
csv.controller.ts
import { Controller, Get } from '@nestjs/common'; import { CsvService } from './csv.service'; @Controller('csv') export class CsvController { constructor(private readonly csvService: CsvService) {} @Get('parse') async parseCsv(): Promise<{ nestExecutionTime: number; totalSales: number; topProductSales: number; }> { return this.csvService.parseCsv(); } }
jualan.php
<?php $start_time = microtime(true); $file = fopen("../generate-csv/sales_data.csv", "r"); $total_sales = 0; $product_sales = []; fgetcsv($file); // Skip header while (($line = fgetcsv($file)) !== false) { $product_id = $line[1]; $quantity = (int)$line[2]; $price = (float)$line[3]; $total = $quantity * $price; $total_sales += $total; if (!isset($product_sales[$product_id])) { $product_sales[$product_id] = 0; } $product_sales[$product_id] += $total; } fclose($file); arsort($product_sales); $top_product = array_key_first($product_sales); $end_time = microtime(true); $execution_time = ($end_time - $start_time); echo "PHP Execution time: ".$execution_time." seconds\n"; echo "Total Sales: $".$total_sales."\n"; echo "Top Product: ".$top_product." with sales $".$product_sales[$top_product]."\n";
import csv import time # Input file name config input_file = '../generate-csv/sales_data.csv' def parse_csv(file_path): start_time = time.time() total_sales = 0 product_sales = {} with open(file_path, mode='r') as file: reader = csv.DictReader(file) for row in reader: product_id = row['product_id'] quantity = int(row['quantity']) price = float(row['price']) total = quantity * price total_sales += total if product_id not in product_sales: product_sales[product_id] = 0 product_sales[product_id] += total top_product = max(product_sales, key=product_sales.get) execution_time = time.time() - start_time return { 'total_sales': total_sales, 'top_product': top_product, 'top_product_sales': product_sales[top_product], 'execution_time': execution_time, } if __name__ == "__main__": result = parse_csv(input_file) print(f"Python Execution time: {result['execution_time']:.2f} seconds") print(f"Total Sales: ${result['total_sales']:.2f}") print(f"Top Product: {result['top_product']} with sales ${ result['top_product_sales']:.2f}")
Berikut ialah keputusan ujian penanda aras kami:
Tanda aras saya mendedahkan beberapa cerapan menarik:
Masa Pelaksanaan: Golang menunjukkan prestasi terbaik dari segi masa pelaksanaan, diikuti rapat oleh PHP8, manakala NestJS mengambil masa paling lama untuk menyelesaikan tugasan.
Penggunaan Memori: Bina NestJS menunjukkan penggunaan memori yang cekap, manakala Python menunjukkan penggunaan memori yang lebih tinggi.
Kemudahan Pelaksanaan: Golang menyediakan pelaksanaan yang paling mudah, manakala NestJS memerlukan lebih banyak baris kod dan kerumitan.
Berdasarkan penemuan saya, Golang menawarkan kelajuan prestasi terbaik dan kecekapan memori, menjadikannya pilihan terbaik untuk mengendalikan set data yang besar.
Anda boleh mendapatkan kod penuh pada repositori Github Saya
csv-parsing-pertempuran.
Atas ialah kandungan terperinci Penandaarasan Pemprosesan Fail CSV: Golang lwn NestJS lwn PHP lwn Python. Untuk maklumat lanjut, sila ikut artikel berkaitan lain di laman web China PHP!