|
6 | 6 | import pandas as pd
|
7 | 7 | import numpy as np
|
8 | 8 | import xlrd
|
| 9 | +import openpyxl |
9 | 10 |
|
10 | 11 | from pyscal.utils import capillarypressure
|
11 | 12 | from .wateroil import WaterOil
|
@@ -597,43 +598,53 @@ def load_relperm_df(inputfile, sheet_name=None):
|
597 | 598 |
|
598 | 599 | Returns:
|
599 | 600 | pd.DataFrame. To be handed over to pyscal list factory methods.
|
| 601 | + Empty dataframe in case of errors (messages will be logged). |
600 | 602 | """
|
601 | 603 | if isinstance(inputfile, (str, Path)) and Path(inputfile).is_file():
|
602 |
| - if str(inputfile).lower().endswith("csv") and sheet_name is not None: |
| 604 | + tabular_file_format = infer_tabular_file_format(inputfile) |
| 605 | + if not tabular_file_format: |
| 606 | + # Error message emitted by infer_file_format() |
| 607 | + return pd.DataFrame() |
| 608 | + |
| 609 | + if tabular_file_format == "csv" and sheet_name is not None: |
603 | 610 | logger.warning(
|
604 | 611 | "Sheet name only relevant for XLSX files, ignoring %s", sheet_name
|
605 | 612 | )
|
606 |
| - try: |
607 |
| - if sheet_name: |
608 |
| - input_df = pd.read_excel(inputfile, sheet_name=sheet_name) |
609 |
| - logger.info("Parsed XLSX file %s, sheet %s", inputfile, sheet_name) |
610 |
| - else: |
611 |
| - input_df = pd.read_excel(inputfile) |
612 |
| - logger.info("Parsed XLSX file %s", inputfile) |
613 |
| - except xlrd.XLRDError as xlserror: |
614 |
| - if inputfile.lower().endswith("xlsx") or inputfile.lower().endswith( |
615 |
| - "xls" |
616 |
| - ): |
617 |
| - logger.error(xlserror) |
| 613 | + excel_engines = {"xls": "xlrd", "xlsx": "openpyxl"} |
| 614 | + if sheet_name: |
618 | 615 | try:
|
619 |
| - input_df = pd.read_csv(inputfile, skipinitialspace=True) |
620 |
| - logger.info("Parsed CSV file %s", inputfile) |
621 |
| - except pd.errors.ParserError as csverror: |
622 |
| - logger.error("Could not parse %s as XLSX or CSV", inputfile) |
623 |
| - logger.error("Error message from csv-parser: %s", str(csverror)) |
624 |
| - input_df = pd.DataFrame() |
625 |
| - except ValueError: |
626 |
| - # We end here when we use csv reader on xls files, that |
627 |
| - # means that xls parsing failed in the first place. Other |
628 |
| - # error messages have been, and will be printed. |
629 |
| - input_df = pd.DataFrame() |
| 616 | + input_df = pd.read_excel( |
| 617 | + inputfile, |
| 618 | + sheet_name=sheet_name, |
| 619 | + engine=excel_engines[tabular_file_format], |
| 620 | + ) |
| 621 | + logger.info( |
| 622 | + "Parsed %s file %s, sheet %s", |
| 623 | + tabular_file_format.upper(), |
| 624 | + inputfile, |
| 625 | + sheet_name, |
| 626 | + ) |
| 627 | + except KeyError as error: |
| 628 | + logger.error("Non-existing sheet-name %s provided?", sheet_name) |
| 629 | + logger.error(str(error)) |
| 630 | + return pd.DataFrame() |
| 631 | + elif tabular_file_format.startswith("xls"): |
| 632 | + input_df = pd.read_excel( |
| 633 | + inputfile, engine=excel_engines[tabular_file_format] |
| 634 | + ) |
| 635 | + logger.info("Parsed %s file %s", tabular_file_format.upper(), inputfile) |
| 636 | + else: |
| 637 | + input_df = pd.read_csv(inputfile, skipinitialspace=True) |
| 638 | + logger.info("Parsed CSV file %s", inputfile) |
| 639 | + |
630 | 640 | elif isinstance(inputfile, pd.DataFrame):
|
631 | 641 | input_df = inputfile
|
632 | 642 | else:
|
633 | 643 | if isinstance(inputfile, str) and not Path(inputfile).is_file():
|
634 | 644 | raise IOError("File not found " + str(inputfile))
|
635 | 645 | raise ValueError("Unsupported argument " + str(inputfile))
|
636 | 646 | assert isinstance(input_df, pd.DataFrame)
|
| 647 | + |
637 | 648 | if input_df.empty:
|
638 | 649 | logger.error("Relperm input dataframe is empty!")
|
639 | 650 |
|
@@ -1044,6 +1055,46 @@ def filter_nan_from_dict(params):
|
1044 | 1055 | return cleaned_params
|
1045 | 1056 |
|
1046 | 1057 |
|
| 1058 | +def infer_tabular_file_format(filename): |
| 1059 | + """Determine the file format of a file containing tabular data, |
| 1060 | + distinguishes between csv, xls and xlsx |
| 1061 | +
|
| 1062 | + Args: |
| 1063 | + filename (str): Path to file, possibley pathlib Path object. |
| 1064 | +
|
| 1065 | + Returns: |
| 1066 | + str: One of "csv", "xlsx" or "xls". Empty string if nothing found out. |
| 1067 | + """ |
| 1068 | + try: |
| 1069 | + pd.read_excel(filename, engine="openpyxl") |
| 1070 | + return "xlsx" |
| 1071 | + except openpyxl.utils.exceptions.InvalidFileException: |
| 1072 | + # We get here for both CSV and XLS files. |
| 1073 | + pass |
| 1074 | + try: |
| 1075 | + pd.read_excel(filename, engine="xlrd") |
| 1076 | + return "xls" |
| 1077 | + except xlrd.biffh.XLRDError: |
| 1078 | + # We get here for both CSV and XLSX files. |
| 1079 | + pass |
| 1080 | + try: |
| 1081 | + dframe = pd.read_csv(filename) |
| 1082 | + if not dframe.empty: |
| 1083 | + return "csv" |
| 1084 | + except UnicodeDecodeError: |
| 1085 | + # (xls and xlsx files) |
| 1086 | + pass |
| 1087 | + except pd.parser.ParserError as csverror: |
| 1088 | + logger.error("Message from CSV parser: %s", str(csverror)) |
| 1089 | + # Some text file that is not CSV |
| 1090 | + pass |
| 1091 | + |
| 1092 | + logger.error( |
| 1093 | + "Impossible to infer file format for %s, not CSV/XLS/XLSX", str(filename) |
| 1094 | + ) |
| 1095 | + return "" |
| 1096 | + |
| 1097 | + |
1047 | 1098 | def check_deprecated(params):
|
1048 | 1099 | """Check for deprecated parameter names
|
1049 | 1100 |
|
|
0 commit comments