Pages: 1
  Print  
Author Topic: fileio functions (UTF-8 on Windows + file descriptors)  (Read 167 times)
Offline (Male) time-killer-games
Posted on: October 23, 2021, 09:45:27 am

Contributor
Location: Virginia Beach
Joined: Jan 2013
Posts: 1172

View Profile Email
I was hoping you guys could review this code.

This is currently under universal, but can be moved to extensions so that we can do platform checks in the makefile and not the sources.

Code: [Select]
  int file_bin_open(string fname, int mode) {
    #if defined(_WIN32)
    wstring wfname = widen(fname);
    FILE *fp = nullptr;
    switch (mode) {
      case  0: { if (!_wfopen_s(&fp, wfname.c_str(), L"rb, ccs=UTF-8" )) break; return -1; }
      case  1: { if (!_wfopen_s(&fp, wfname.c_str(), L"wb, ccs=UTF-8" )) break; return -1; }
      case  2: { if (!_wfopen_s(&fp, wfname.c_str(), L"w+b, ccs=UTF-8")) break; return -1; }
      case  3: { if (!_wfopen_s(&fp, wfname.c_str(), L"ab, ccs=UTF-8" )) break; return -1; }
      case  4: { if (!_wfopen_s(&fp, wfname.c_str(), L"a+b, ccs=UTF-8")) break; return -1; }
      default: return -1;
    }
    if (fp) { int fd = _dup(_fileno(fp));
    fclose(fp); return fd; }
    #else
    FILE *fp = nullptr;
    switch (mode) {
      case  0: { fp = fopen(fname.c_str(), "rb" ); break; }
      case  1: { fp = fopen(fname.c_str(), "wb" ); break; }
      case  2: { fp = fopen(fname.c_str(), "w+b"); break; }
      case  3: { fp = fopen(fname.c_str(), "ab" ); break; }
      case  4: { fp = fopen(fname.c_str(), "a+b"); break; }
      default: return -1;
    }
    if (fp) { int fd = dup(fileno(fp));
    fclose(fp); return fd; }
    #endif
    return -1;
  }

  int file_bin_rewrite(int fd) {
    #if defined(_WIN32)
    _lseek(fd, 0, SEEK_SET);
    return _chsize(fd, 0);
    #else
    lseek(fd, 0, SEEK_SET);
    return ftruncate(fd, 0);
    #endif
  }
 
  int file_bin_close(int fd) {
    #if defined(_WIN32)
    return _close(fd);
    #else
    return close(fd);
    #endif
  }
 
  long file_bin_size(int fd) {
    #if defined(_WIN32)
    struct _stat info = { 0 };
    int result = _fstat(fd, &info);
    #else
    struct stat info = { 0 };
    int result = fstat(fd, &info);
    #endif
    if (result != -1) {
      return info.st_size;
    }
    return 0;
  }

  long file_bin_position(int fd) {
    #if defined(_WIN32)
    return _lseek(fd, 0, SEEK_CUR);
    #else
    return lseek(fd, 0, SEEK_CUR);
    #endif
  }
 
  long file_bin_seek(int fd, long pos) {
    #if defined(_WIN32)
    return _lseek(fd, pos, SEEK_CUR);
    #else
    return lseek(fd, pos, SEEK_CUR);
    #endif
  }

  int file_bin_read_byte(int fd) {
    int byte = -1;
    #if defined(_WIN32)
    int num = (int)_read(fd, &byte, 1);
    #else
    int num = (int)read(fd, &byte, 1);
    #endif
    if (num == -1) return -1;
    return byte;
  }

  int file_bin_write_byte(int fd, int byte) {
    #if defined(_WIN32)
    return (int)_write(fd, &byte, 1);
    #else
    return (int)write(fd, &byte, 1);
    #endif
  }

  int file_text_open_read(string fname) {
    return file_bin_open(fname, 0);
  }

  int file_text_open_write(string fname) {
    return file_bin_open(fname, 1);
  }

  int file_text_open_append(string fname) {
    return file_bin_open(fname, 3);
  }

  long file_text_write_string(int fd, string str) {
    #if defined(_WIN32)
    wstring wstr = widen(str);
    wchar_t *buffer = new wchar_t[wstr.length()];
    wcscpy_s(buffer, wstr.length(), wstr.c_str());
    str = narrow(buffer);
    long result = _write(fd, str.data(), str.length());
    #else
    char *buffer = new char[str.length()];
    strcpy(buffer, str.c_str());
    long result = write(fd, buffer, str.length());
    #endif
    delete[] buffer;
    return result;
  }

  long file_text_write_real(int fd, double val) {
    string str = std::to_string(val);
    return file_text_write_string(fd, str);
  }

  int file_text_writeln(int fd) {
    return file_bin_write_byte(fd, '\n');
  }

  bool file_text_eof(int fd) {
    return (file_bin_position(fd) >= file_bin_size(fd));
  }

  bool file_text_eoln(int fd) {
    file_bin_seek(fd, -1);
    bool res = ((char)file_bin_read_byte(fd) == '\n');
    return (file_text_eof(fd) || res);
  }

  double file_text_read_real(int fd) {
    bool dot = false, sign = false;
    string str; char byte = (char)file_bin_read_byte(fd);
    if (byte == '\n') byte = (char)file_bin_read_byte(fd);
    if (byte == '.' && !dot) {
      dot = true;
    } else if (!is_digit(byte) && byte != '+' &&
      byte != '-' && byte != '.') {
      return 0;
    } else if (byte == '+' || byte == '-') {
      sign = true;
    }
    if (byte == -1) goto finish;
    str.resize(str.length() + 1, '\0');
    str[str.length() - 1] = byte;
    if (sign) {
      byte = (char)file_bin_read_byte(fd);
      if (byte == '.' && !dot) {
        dot = true;
      } else if (!is_digit(byte) && byte != '.') {
        return strtod(str.c_str(), nullptr);
      }
      if (byte == -1) goto finish;
      str.resize(str.length() + 1, '\0');
      str[str.length() - 1] = byte;
    }
    while (byte != '\n' && !(file_bin_position(fd) > file_bin_size(fd))) {
      byte = (char)file_bin_read_byte(fd);
      if (byte == '.' && !dot) {
        dot = true;
      } else if (byte == '.' && dot) {
        break;
      } else if (!is_digit(byte) && byte != '.') {
        break;
      } else if (byte == '\n' || file_bin_position(fd) > file_bin_size(fd)) {
        break;
      }
      if (byte == -1) goto finish;
      str.resize(str.length() + 1, '\0');
      str[str.length() - 1] = byte;
    }
    finish:
    return strtod(str.c_str(), nullptr);
  }

  string file_text_read_string(int fd) {
    int byte = -1; string str;
    while ((char)byte != '\n' && !file_text_eof(fd)) {
      byte = file_bin_read_byte(fd);
      str.resize(str.length() + 1, '\0');
      str[str.length() - 1] = ((byte == -1) ? 0 : byte);
      if (byte == -1) break;
    }
    if (str[str.length() - 2] != '\r' && str[str.length() - 1] == '\n') {
      file_bin_seek(fd, -1);
    }
    if (str[str.length() - 2] == '\r' && str[str.length() - 1] == '\n') {
      file_bin_seek(fd, -2);
    }
    return str;
  }

  string file_text_readln(int fd) {
    int byte = -1; string str;
    while ((char)byte != '\n' && !file_text_eof(fd)) {
      byte = file_bin_read_byte(fd);
      str.resize(str.length() + 1, '\0');
      str[str.length() - 1] = ((byte == -1) ? 0 : byte);
      if (byte == -1) break;
    }
    return str;
  }

  string file_text_read_all(int fd) {
    string str;
    long sz = file_bin_size(fd);
    char *buffer = new char[sz];
    #if defined(_WIN32)
    long result =  _read(fd, buffer, sz);
    #else
    long result = read(fd, buffer, sz);
    #endif
    if (result == -1) {
      delete[] buffer;
      return "";
    }
    str = buffer ? buffer : "";
    delete[] buffer;
    return str;
  }

  int file_text_open_from_string(string str) {
    int fd[2];
    #if defined(_WIN32)
    if (_pipe(fd, str.length() + 1, O_BINARY) == -1) return -1;
    #else
    if (pipe(fd) < 0) return -1;
    #endif
    if (file_text_write_string(fd[1], str) == -1) {
      file_bin_close(fd[0]);
      file_bin_close(fd[1]);
      return -1;
    }
    file_bin_close(fd[1]);
    return fd[0];
  }
 
  int file_text_close(int fd) {
    return file_bin_close(fd);
  }

This enables UTF-8 support on windows as well as makes use of real file desciptors instead of utilizing the asset array, both of which are improvements if you ask me. All of these functions may be used interchangeably. So that means you can return a file descriptor from file_bin_open(), file_text_open_read(), file_text_open_write(), file_text_open_append(), or file_text_open_from_string(), and pass that return value to any of the other file_bin_* and/or file_text_* functions and they work together seamlessly.

You can open a file with file_bin_open() for example and write to it with file_text_write_string(), or use the returned value of file_text_open_from_string() stored in a variable and change the seek position while you read from it with file_bin_seek(). Keep in mind that file_text_open_from_string() returns a read-only file descriptor still, as the documentation mentions for GameMaker Studio, and it can't be written or appended to once it has been created.

I also changed the behavior of file_text_read_all() slightly so that it really just reads the contents left of the file based on the current seek position. If you are at the very beginning of the file it will still read the entire contents so this is unlikely to break existing games, however it will make them slower with the current implementation. If it does break a game they can just seek the file to the beginning before reading all the contents with that function.

This must be a bug with enigma however -- while this code works with UTF-8 in a test I wrote in a blank MinGW compiled project, it still does not actually have UTF-8 in enigma itself on Windows, using the same exact code. I'm not sure what to make of that, but I know it's not an issue with any of my code. I am a bit concerned there are some conditions by which file_text_read_real() would ultimately break so please feel free to test it and let me know if you can make a reproducible of it breaking and I'll be happy to write a revised version of that function accordingly. Although it seems to work quite well for me thus far.

The above code is licensed under MIT and is pulled from my GitHub account, however you are free to relicense it however you like when implementing any portion of it modified or not into enigma. I recommend relicensing under GPLv3 (with a linking exception) to match the rest of the software.
« Last Edit: November 18, 2021, 06:20:08 am by time-killer-games » Logged
Pages: 1
  Print