data_parsing_utils.py 2.4 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374
  1. import math
  2. from typing import List, Tuple, Union
  3. def is_float(element) -> bool:
  4. try:
  5. float(element)
  6. val = float(element)
  7. if math.isnan(val) or math.isinf(val):
  8. raise ValueError
  9. return True
  10. except ValueError:
  11. return False
  12. # (to utf-8)
  13. # status returned
  14. def read_data(data: list) -> str:
  15. for x in range(len(data)):
  16. if type(data[x]) == bytes:
  17. try:
  18. data[x] = data[x].decode('utf-8-sig', 'ignore')
  19. except:
  20. return 'Not an utf-8-sig line №: ' + str(x)
  21. return 'data read, but not parsed'
  22. # check if line has comments
  23. # first is a comment line according to .snp documentation,
  24. # others detects comments in various languages
  25. def check_line_comments(line: str) -> Union[str, None]:
  26. if len(line) < 2 or line[0] == '!' or line[0] == '#' or line[
  27. 0] == '%' or line[0] == '/':
  28. return None
  29. else:
  30. # generally we expect these chars as separators
  31. line = line.replace(';', ' ').replace(',', ' ').replace('|', ' ')
  32. if '!' in line:
  33. line = line[:line.find('!')]
  34. return line
  35. # unpack a few first lines of the file to get number of ports
  36. def count_columns(data: List[str]) -> Tuple[int, str]:
  37. return_status = 'data parsed'
  38. column_count = 0
  39. for x in range(len(data)):
  40. line = check_line_comments(data[x])
  41. if line is None:
  42. continue
  43. line = line.split()
  44. # always at least 3 values for single data point
  45. if len(line) < 3:
  46. return_status = 'Can\'t parse line № ' + \
  47. str(x) + ',\n not enough arguments (less than 3)'
  48. break
  49. column_count = len(line)
  50. break
  51. return (column_count, return_status)
  52. def prepare_snp(data: List[str], number: int) -> Tuple[List[str], str]:
  53. prepared_data = []
  54. return_status = 'data read, but not parsed'
  55. for x in range(len(data)):
  56. line = check_line_comments(data[x])
  57. if line is None:
  58. continue
  59. splitted_line = line.split()
  60. if number * 2 + 1 == len(splitted_line):
  61. prepared_data.append(line)
  62. elif number * 2 == len(splitted_line):
  63. prepared_data[-1] += line
  64. else:
  65. return_status = "Parsing error for .snp format on line №" + str(x)
  66. return prepared_data, return_status