Pages: 1
  Print  
Author Topic: fileio functions (UTF-8 on Windows + file descriptors)  (Read 707 times)
Offline (Male) time-killer-games
Posted on: October 23, 2021, 09:45:27 am

Contributor
Location: Virginia Beach
Joined: Jan 2013
Posts: 1177

View Profile Email
I was hoping you guys could review this code.

This is currently under universal, but can be moved to extensions so that we can do platform checks in the makefile and not the sources.

Code: [Select]
  string get_filedescriptor_pathname(int fd) {
    string path;
    #if defined(_WIN32)
    DWORD length; HANDLE file = (HANDLE)_get_osfhandle(fd);
    if ((length = GetFinalPathNameByHandleW(file, nullptr, 0, VOLUME_NAME_DOS))) {
      wstring wpath; wpath.resize(length, '\0'); wchar_t *buffer = wpath.data();
      if ((length = GetFinalPathNameByHandleW(file, buffer, length, VOLUME_NAME_DOS))) {
        path = narrow(wpath); size_t pos = 0; string substr = "\\\\?\\";
        if ((pos = path.find(substr, pos)) != string::npos) {
          path.replace(pos, substr.length(), "");
        }
      }
    }
    #elif (defined(__APPLE__) && defined(__MACH__)) || defined(__DragonFly__)
    char buffer[PATH_MAX];
    if (fcntl(fd, F_GETPATH, buffer) != -1) {
      path = buffer;
    }
    #elif defined(__linux__)
    char *buffer = realpath(("/proc/self/fd/" + std::to_string(fd)).c_str(), nullptr);
    path = buffer ? buffer : "";
    free(buffer);
    #elif defined(__FreeBSD__)
    size_t length;
    int mib[4] = { CTL_KERN, KERN_PROC, KERN_PROC_FILEDESC, getpid() };
    if (sysctl(mib, 4, nullptr, &length, nullptr, 0) == 0) {
      path.resize(length * 2, '\0'); char *buffer = path.data();
      if (sysctl(mib, 4, buffer, &length, nullptr, 0) == 0) {
        for (char *p = buffer; p < buffer + length;) {
          struct kinfo_file *kif = (struct kinfo_file *)p;
          if (kif->kf_fd == fd) {
            path = kif->kf_path;
          }
          p += kif->kf_structsize;
        }
      }
    }
    #endif
    return path;
  }

  int file_bin_open(string fname, int mode) {
    #if defined(_WIN32)
    wstring wfname = widen(fname);
    FILE *fp = nullptr;
    switch (mode) {
      case  0: { if (!_wfopen_s(&fp, wfname.c_str(), L"rb, ccs=UTF-8" )) break; return -1; }
      case  1: { if (!_wfopen_s(&fp, wfname.c_str(), L"wb, ccs=UTF-8" )) break; return -1; }
      case  2: { if (!_wfopen_s(&fp, wfname.c_str(), L"w+b, ccs=UTF-8")) break; return -1; }
      case  3: { if (!_wfopen_s(&fp, wfname.c_str(), L"ab, ccs=UTF-8" )) break; return -1; }
      case  4: { if (!_wfopen_s(&fp, wfname.c_str(), L"a+b, ccs=UTF-8")) break; return -1; }
      default: return -1;
    }
    if (fp) { int fd = _dup(_fileno(fp));
    fclose(fp); return fd; }
    #else
    FILE *fp = nullptr;
    switch (mode) {
      case  0: { fp = fopen(fname.c_str(), "rb" ); break; }
      case  1: { fp = fopen(fname.c_str(), "wb" ); break; }
      case  2: { fp = fopen(fname.c_str(), "w+b"); break; }
      case  3: { fp = fopen(fname.c_str(), "ab" ); break; }
      case  4: { fp = fopen(fname.c_str(), "a+b"); break; }
      default: return -1;
    }
    if (fp) { int fd = dup(fileno(fp));
    fclose(fp); return fd; }
    #endif
    return -1;
  }

  int file_bin_rewrite(int fd) {
    #if defined(_WIN32)
    _lseek(fd, 0, SEEK_SET);
    return _chsize(fd, 0);
    #else
    lseek(fd, 0, SEEK_SET);
    return ftruncate(fd, 0);
    #endif
  }
 
  int file_bin_close(int fd) {
    #if defined(_WIN32)
    return _close(fd);
    #else
    return close(fd);
    #endif
  }
 
  long file_bin_size(int fd) {
    #if defined(_WIN32)
    struct _stat info = { 0 };
    int result = _fstat(fd, &info);
    #else
    struct stat info = { 0 };
    int result = fstat(fd, &info);
    #endif
    if (result != -1) {
      return info.st_size;
    }
    return 0;
  }

  long file_bin_position(int fd) {
    #if defined(_WIN32)
    return _lseek(fd, 0, SEEK_CUR);
    #else
    return lseek(fd, 0, SEEK_CUR);
    #endif
  }
 
  long file_bin_seek(int fd, long pos) {
    #if defined(_WIN32)
    return _lseek(fd, pos, SEEK_CUR);
    #else
    return lseek(fd, pos, SEEK_CUR);
    #endif
  }

  int file_bin_read_byte(int fd) {
    int byte = 0;
    #if defined(_WIN32)
    int num = (int)_read(fd, &byte, 1);
    #else
    int num = (int)read(fd, &byte, 1);
    #endif
    if (num == -1) return 0;
    return byte;
  }

  int file_bin_write_byte(int fd, int byte) {
    #if defined(_WIN32)
    return (int)_write(fd, &byte, 1);
    #else
    return (int)write(fd, &byte, 1);
    #endif
  }

  int file_text_open_read(string fname) {
    return file_bin_open(fname, 0);
  }

  int file_text_open_write(string fname) {
    return file_bin_open(fname, 1);
  }

  int file_text_open_append(string fname) {
    return file_bin_open(fname, 3);
  }

  long file_text_write_string(int fd, string str) {
    char *buffer = str.data();
    #if defined(_WIN32)
    long result = _write(fd, buffer, (unsigned)str.length());
    #else
    long result = write(fd, buffer, (unsigned)str.length());
    #endif
    return result;
  }

  long file_text_write_real(int fd, double val) {
    string str = std::to_string(val);
    return file_text_write_string(fd, str);
  }

  int file_text_writeln(int fd) {
    return file_bin_write_byte(fd, '\n');
  }

  static unsigned cnt = 0;
  bool file_text_eof(int fd) {
    bool res1 = ((char)file_bin_read_byte(fd) == '\0');
    bool res2 = (file_bin_position(fd) > file_bin_size(fd));
    while (res2 && cnt < 2) {
    file_bin_seek(fd, -1); cnt++; }
    if (!res2) file_bin_seek(fd, -1);
    cnt = 0; return (res1 || res2);
  }

  bool file_text_eoln(int fd) {
    bool res1 = ((char)file_bin_read_byte(fd) == '\n');
    bool res2 = file_text_eof(fd);
    while (res2 && cnt < 2) {
    file_bin_seek(fd, -1); cnt++; }
    if (!res2) file_bin_seek(fd, -1);
    cnt = 0; return (res1 || res2);
  }

  string file_text_read_string(int fd) {
    int byte = file_bin_read_byte(fd); string str;
    str.push_back((char)byte);
    while ((char)byte != '\n') {
      byte = file_bin_read_byte(fd);
      str.push_back((char)byte);
      if (byte == 0) break;
    }
    if (str.length() >= 2) {
      if (str[str.length() - 2] != '\r' && str[str.length() - 1] == '\n') {
        file_bin_seek(fd, -1);
        str = str.substr(0, str.length() - 1);
      }
      if (str[str.length() - 2] == '\r' && str[str.length() - 1] == '\n') {
        file_bin_seek(fd, -2);
        str = str.substr(0, str.length() - 2);
      }
    } else if (str.length() == 1) {
      if (str[str.length() - 1] == '\n') {
        file_bin_seek(fd, -1);
        str = str.substr(0, str.length() - 1);
      }
    }
    return str;
  }

  static bool is_digit(char byte) {
    return (byte == '0' || byte == '1' || byte == '2' || byte == '3' || byte == '4' ||
      byte == '5' || byte == '6' || byte == '7' || byte == '8' || byte == '9');
  }

  double file_text_read_real(int fd) {
    bool dot = false, sign = false;
    string str; char byte = (char)file_bin_read_byte(fd);
    while (byte == '\r' || byte == '\n') byte = (char)file_bin_read_byte(fd);
    if (byte == '.' && !dot) {
      dot = true;
    } else if (!is_digit(byte) && byte != '+' &&
      byte != '-' && byte != '.') {
      return 0;
    } else if (byte == '+' || byte == '-') {
      sign = true;
    }
    if (byte == 0) goto finish;
    str.push_back(byte);
    if (sign) {
      byte = (char)file_bin_read_byte(fd);
      if (byte == '.' && !dot) {
        dot = true;
      } else if (!is_digit(byte) && byte != '.') {
        return strtod(str.c_str(), nullptr);
      }
      if (byte == 0) goto finish;
      str.push_back(byte);
    }
    while (byte != '\n' && !(file_bin_position(fd) > file_bin_size(fd))) {
      byte = (char)file_bin_read_byte(fd);
      if (byte == '.' && !dot) {
        dot = true;
      } else if (byte == '.' && dot) {
        break;
      } else if (!is_digit(byte) && byte != '.') {
        break;
      } else if (byte == '\n' || file_bin_position(fd) > file_bin_size(fd)) {
        break;
      }
      if (byte == 0) goto finish;
      str.push_back(byte);
    }
    finish:
    return strtod(str.c_str(), nullptr);
  }

  string file_text_readln(int fd) {
    int byte = file_bin_read_byte(fd); string str;
    str.push_back((char)byte);
    while ((char)byte != '\n') {
      byte = file_bin_read_byte(fd);
      str.push_back((char)byte);
      if (byte == 0) break;
    }
    return str;
  }

  string file_text_read_all(int fd) {
    string str;
    long sz = file_bin_size(fd);
    char *buffer = new char[sz];
    #if defined(_WIN32)
    long result = _read(fd, buffer, sz);
    #else
    long result = read(fd, buffer, sz);
    #endif
    if (result == -1) {
      delete[] buffer;
      return "";
    }
    str = buffer ? buffer : "";
    delete[] buffer;
    return str;
  }

  int file_text_open_from_string(string str) {
    string fname = get_temp_directory() + "temp.XXXXXX";
    #if defined(_WIN32)
    int fd = -1; wstring wfname = widen(fname);
    wchar_t *buffer = wfname.data(); if (_wmktemp_s(buffer, wfname.length() + 1)) return -1;
    if (_wsopen_s(&fd, buffer, _O_CREAT | _O_RDWR | _O_WTEXT, _SH_DENYNO, _S_IREAD | _S_IWRITE)) {
      return -1;
    }
    #else
    char *buffer = fname.data();
    int fd = mkstemp(buffer);
    #endif
    if (fd == -1) return -1;
    file_text_write_string(fd, str);
    #if defined(_WIN32)
    _lseek(fd, 0, SEEK_SET);
    #else
    lseek(fd, 0, SEEK_SET);
    #endif
    return fd;
  }
 
  int file_text_close(int fd) {
    return file_bin_close(fd);
  }

This enables UTF-8 support on Windows as well as makes use of real file descriptors instead of utilizing the asset array, both of which are improvements if you ask me. All of these functions may be used interchangeably. So that means you can return a file descriptor from file_bin_open(), file_text_open_read(), file_text_open_write(), file_text_open_append(), or file_text_open_from_string(), and pass that return value to any of the other file_bin_* and/or file_text_* functions and they work together seamlessly.

You can open a file with file_bin_open() for example and write to it with file_text_write_string(), or use the returned value of file_text_open_from_string() stored in a variable and change the seek position while you read from it with file_bin_seek(). The file created by GameMaker's implementation of file_text_open_from_string() is read-only which I found to be a bit silly, and GameMaker has no means to retrieve the filename of this temp file, and it gets deleted automatically when the file is closed with file_text_close(). I changed it so that you have both read and write permissions to the file upon creation, and you may the file doesn't get deleted when closed automatically with my implementation, to give the user more control over the lifetime of the file. You may get the absolute path filename from the returned file descriptor using the get_filedescriptor_pathname() function, which will allow you to delete that file from your temporary directory.

I also changed the behavior of file_text_read_all() slightly so that it really just reads the contents left of the file based on the current seek position. If you are at the very beginning of the file it will still read the entire contents so this is unlikely to break existing games. If it does break a game they can just seek the file to the beginning before reading all the contents with that function.

The above code is licensed under MIT and is pulled from my GitHub account, however you are free to relicense it however you like when implementing any portion of it modified or not into enigma. I recommend relicensing under GPLv3 (with a linking exception) to match the rest of the software. The full code may be viewed here: https://github.com/time-killer-games/libfilesystem/blob/master/filesystem.cpp
« Last Edit: December 15, 2021, 02:18:29 am by time-killer-games » Logged
Pages: 1
  Print