Functions on Objects(对象函数)¶
Typescript¶
Parse and process Excel files in Typescript¶
How do I parse and process Excel files in Typescript?
This code uses the ExcelJS library to parse and process Excel files in Typescript. It reads an Excel file, processes the workbook, validates the header, and unpivots the data to create a new data object.
import { OntologyEditFunction, Timestamp } from "@foundry/functions-api";
import { Uuid } from "@foundry/functions-utils";
import { Objects, Upload, SampleData } from "@foundry/ontology-api";
import { Workbook } from "exceljs";
interface DataRow {
target: string;
unit: string;
property: string;
value: number;
uploaded_at: Timestamp;
upload_key: string;
}
export class ParseExcel {
@OntologyEditFunction()
public async addData(upload: Upload): Promise<void> {
try {
const arrayBuffer = await this.getArrayBufferFromAttachment(upload);
// Create a workbook from the arrayBuffer
const workbook = new Workbook();
await workbook.xlsx.load(arrayBuffer);
const meltedData = this.processWorkbook(workbook, upload.uploadedAt!, upload.uploadKey!);
const version = this.generateVersion(upload.title!, upload.uploadedAt!);
meltedData.forEach(row => {
this.createNewData(row, version);
});
upload.status = 'Success'
// Update Upload Version
upload.version = version;
} catch (error: any) {
upload.status = 'Failed';
upload.errorMessage = error.message;
}
}
/**
* Validate header to make sure it has all necessary headers
*/
private validateHeader(header: string[]): string | null {
const target = 'target';
const yIntercept = 'Y_intercept';
if (header[1] !== target || header[2] !== yIntercept) {
// console.log(header)
return 'The header must start with target and Y_intercept in the first two columns.';
}
const regex = /^[a-zA-Z0-9_]+$/;
for (const columnName of header) {
if (!regex.test(columnName)) {
return `The header contains invalid characters. Only English letters and underscores are allowed. Invalid column name: ${columnName}`;
}
}
return null;
}
private createNewData(row: DataRow, version: string) {
const newData = Objects.create()._sampleData(Uuid.random());
newData.title = `${row.target} ${row.unit} ${row.property} Data`;
newData.value = row.value;
newData.target = row.target;
newData.unit = row.unit;
newData.version = version;
newData.property_ = row.property;
newData.uploadKey = row.upload_key;
newData.propertyKey = this.generatePropertyKey(row.unit, row.property);
return newData;
}
private generatePropertyKey(unit: string, property: string){
return `Sample_Property_Key_${unit}_${property}`
}
private generateVersion(unit: string, uploaded_at: Timestamp): string {
// Format the timestamp as YYYYMMDD_HHMMSS
const formattedDate = uploaded_at.toISOString().slice(0, 10).replace(/-/g, "");
const formattedTime = uploaded_at.toISOString().slice(11, 19).replace(/:/g, "");
// Create the version string using the unit and the formatted timestamp
const version = `${unit}_${formattedDate}_${formattedTime}`;
return version;
}
/**
* Reads ArrayBuffer from the attachment in the given Upload object.
* @param upload The Upload object containing the attachment.
*/
private async getArrayBufferFromAttachment(upload: Upload): Promise<ArrayBuffer> {
const attachmentBlob = await upload.attachment!.readAsync();
return attachmentBlob.arrayBuffer();
}
/**
* Processes the workbook and returns a 2D array in DataFrame format.
* @param workbook The Workbook object to process.
*/
private processWorkbook(workbook: Workbook, uploadedAt: Timestamp, uploadKey: string): DataRow[] {
const allMeltedData: DataRow[] = [];
workbook.eachSheet((sheet) => {
const sheetUnit = sheet.name;
// Process header
let headerProcessed = false;
let sheetHeader: string[] = [];
const sheetData: any[][] = [];
// Process rows of each sheet
sheet.eachRow((row, rowIndex) => {
if (row.values) {
const rowValues = row.values as (string | number)[];
// If rowIndex is 1, handle it as a header
if (rowIndex === 1) {
sheetHeader = rowValues.filter(value => value !== null && typeof value === 'string') as string[];
sheetHeader.unshift('unit');
const validationResult = this.validateHeader(sheetHeader);
if (validationResult) {
throw new Error(validationResult);
}
headerProcessed = true;
} else {
const newRowValues = this.processRow(rowValues, sheetHeader);
newRowValues.unshift(sheetUnit);
sheetData.push(newRowValues);
}
}
});
// If the header was not processed in the rows, add it manually
if (!headerProcessed) {
sheetHeader.unshift('unit');
const validationResult = this.validateHeader(sheetHeader);
if (validationResult) {
throw new Error(validationResult);
}
}
const meltedData = this.unpivotData(sheetData, sheetUnit, uploadedAt, uploadKey, sheetHeader);
allMeltedData.push(...meltedData);
});
return allMeltedData;
}
/**
* Processes a row from the workbook and returns the new row data.
* @param rowValues The values of the row to process.
* @param header The header array.
*/
private processRow(rowValues: (string | number)[], header: (string | number)[]): any[] {
const newRowValues = Array(header.length).fill(0); // Fill with 0
rowValues.forEach((value, index) => {
if (index > 0 && header[index - 1] !== undefined) {
newRowValues[index - 1] = value;
}
});
return newRowValues.map((value, index) => {
if (header[index] !== 'target' && header[index] !== 'unit') {
return typeof value === 'string' ? parseFloat(value) : value;
}
return value;
});
}
/**
* Unpivots the data and returns an array of MeltedDataRow objects.
* @param data The 2D array in DataFrame format to unpivot.
*/
private unpivotData(data: any[][], unit: string, uploadedAt: Timestamp, uploadKey: string, header: string[]): DataRow[] {
const idVars = ["unit", "target"];
const meltedData: DataRow[] = [];
data.forEach(row => {
const idValues = row.slice(0, idVars.length);
row.slice(idVars.length).forEach((value, index) => {
if (value !== 0) {
const newRow: DataRow = {
target: idValues[1],
unit: unit,
property: header[idVars.length + index] as string, // Use header to get the property name
value: value,
uploaded_at: uploadedAt,
upload_key: uploadKey,
};
meltedData.push(newRow);
}
});
});
return meltedData;
}
}
- Date submitted: 2024-05-23
- Tags:
excel,typescript,functions on objects,file upload,dataframe
中文翻译¶
对象函数¶
TypeScript¶
在 TypeScript 中解析和处理 Excel 文件¶
如何在 TypeScript 中解析和处理 Excel 文件?
以下代码使用 ExcelJS 库在 TypeScript 中解析和处理 Excel 文件。它读取 Excel 文件、处理工作簿、验证表头,并对数据进行逆透视(unpivot)以创建新的数据对象。
import { OntologyEditFunction, Timestamp } from "@foundry/functions-api";
import { Uuid } from "@foundry/functions-utils";
import { Objects, Upload, SampleData } from "@foundry/ontology-api";
import { Workbook } from "exceljs";
interface DataRow {
target: string;
unit: string;
property: string;
value: number;
uploaded_at: Timestamp;
upload_key: string;
}
export class ParseExcel {
@OntologyEditFunction()
public async addData(upload: Upload): Promise<void> {
try {
const arrayBuffer = await this.getArrayBufferFromAttachment(upload);
// 从 arrayBuffer 创建工作簿
const workbook = new Workbook();
await workbook.xlsx.load(arrayBuffer);
const meltedData = this.processWorkbook(workbook, upload.uploadedAt!, upload.uploadKey!);
const version = this.generateVersion(upload.title!, upload.uploadedAt!);
meltedData.forEach(row => {
this.createNewData(row, version);
});
upload.status = 'Success'
// 更新上传版本
upload.version = version;
} catch (error: any) {
upload.status = 'Failed';
upload.errorMessage = error.message;
}
}
/**
* 验证表头,确保包含所有必要的列名
*/
private validateHeader(header: string[]): string | null {
const target = 'target';
const yIntercept = 'Y_intercept';
if (header[1] !== target || header[2] !== yIntercept) {
// console.log(header)
return '表头的前两列必须为 target 和 Y_intercept。';
}
const regex = /^[a-zA-Z0-9_]+$/;
for (const columnName of header) {
if (!regex.test(columnName)) {
return `表头包含无效字符。仅允许英文字母和下划线。无效列名:${columnName}`;
}
}
return null;
}
private createNewData(row: DataRow, version: string) {
const newData = Objects.create()._sampleData(Uuid.random());
newData.title = `${row.target} ${row.unit} ${row.property} Data`;
newData.value = row.value;
newData.target = row.target;
newData.unit = row.unit;
newData.version = version;
newData.property_ = row.property;
newData.uploadKey = row.upload_key;
newData.propertyKey = this.generatePropertyKey(row.unit, row.property);
return newData;
}
private generatePropertyKey(unit: string, property: string){
return `Sample_Property_Key_${unit}_${property}`
}
private generateVersion(unit: string, uploaded_at: Timestamp): string {
// 将时间戳格式化为 YYYYMMDD_HHMMSS
const formattedDate = uploaded_at.toISOString().slice(0, 10).replace(/-/g, "");
const formattedTime = uploaded_at.toISOString().slice(11, 19).replace(/:/g, "");
// 使用 unit 和格式化后的时间戳创建版本字符串
const version = `${unit}_${formattedDate}_${formattedTime}`;
return version;
}
/**
* 从给定的 Upload 对象中的附件读取 ArrayBuffer。
* @param upload 包含附件的 Upload 对象。
*/
private async getArrayBufferFromAttachment(upload: Upload): Promise<ArrayBuffer> {
const attachmentBlob = await upload.attachment!.readAsync();
return attachmentBlob.arrayBuffer();
}
/**
* 处理工作簿并以 DataFrame 格式返回二维数组。
* @param workbook 要处理的工作簿对象。
*/
private processWorkbook(workbook: Workbook, uploadedAt: Timestamp, uploadKey: string): DataRow[] {
const allMeltedData: DataRow[] = [];
workbook.eachSheet((sheet) => {
const sheetUnit = sheet.name;
// 处理表头
let headerProcessed = false;
let sheetHeader: string[] = [];
const sheetData: any[][] = [];
// 处理每个工作表的行
sheet.eachRow((row, rowIndex) => {
if (row.values) {
const rowValues = row.values as (string | number)[];
// 如果 rowIndex 为 1,则作为表头处理
if (rowIndex === 1) {
sheetHeader = rowValues.filter(value => value !== null && typeof value === 'string') as string[];
sheetHeader.unshift('unit');
const validationResult = this.validateHeader(sheetHeader);
if (validationResult) {
throw new Error(validationResult);
}
headerProcessed = true;
} else {
const newRowValues = this.processRow(rowValues, sheetHeader);
newRowValues.unshift(sheetUnit);
sheetData.push(newRowValues);
}
}
});
// 如果行中未处理表头,则手动添加
if (!headerProcessed) {
sheetHeader.unshift('unit');
const validationResult = this.validateHeader(sheetHeader);
if (validationResult) {
throw new Error(validationResult);
}
}
const meltedData = this.unpivotData(sheetData, sheetUnit, uploadedAt, uploadKey, sheetHeader);
allMeltedData.push(...meltedData);
});
return allMeltedData;
}
/**
* 处理工作表中的一行并返回新的行数据。
* @param rowValues 要处理的行值。
* @param header 表头数组。
*/
private processRow(rowValues: (string | number)[], header: (string | number)[]): any[] {
const newRowValues = Array(header.length).fill(0); // 用 0 填充
rowValues.forEach((value, index) => {
if (index > 0 && header[index - 1] !== undefined) {
newRowValues[index - 1] = value;
}
});
return newRowValues.map((value, index) => {
if (header[index] !== 'target' && header[index] !== 'unit') {
return typeof value === 'string' ? parseFloat(value) : value;
}
return value;
});
}
/**
* 对数据进行逆透视并返回 MeltedDataRow 对象数组。
* @param data 要进行逆透视的 DataFrame 格式二维数组。
*/
private unpivotData(data: any[][], unit: string, uploadedAt: Timestamp, uploadKey: string, header: string[]): DataRow[] {
const idVars = ["unit", "target"];
const meltedData: DataRow[] = [];
data.forEach(row => {
const idValues = row.slice(0, idVars.length);
row.slice(idVars.length).forEach((value, index) => {
if (value !== 0) {
const newRow: DataRow = {
target: idValues[1],
unit: unit,
property: header[idVars.length + index] as string, // 使用表头获取属性名称
value: value,
uploaded_at: uploadedAt,
upload_key: uploadKey,
};
meltedData.push(newRow);
}
});
});
return meltedData;
}
}
- 提交日期:2024-05-23
- 标签:
excel、typescript、对象函数、文件上传、dataframe