You cannot select more than 25 topics
			Topics must start with a letter or number, can include dashes ('-') and dots ('.'), can be up to 35 characters long. Letters must be lowercase.
		
		
		
		
		
			
		
			
				
					
					
						
							951 lines
						
					
					
						
							29 KiB
						
					
					
				
			
		
		
	
	
							951 lines
						
					
					
						
							29 KiB
						
					
					
				// This file renders vertex buffers, converts raw meshes | 
						|
// to GL meshes, and manages threads that do the raw-mesh | 
						|
// building (found in cave_mesher.c) | 
						|
 | 
						|
 | 
						|
#include "stb_voxel_render.h" | 
						|
 | 
						|
#define STB_GLEXT_DECLARE "glext_list.h" | 
						|
#include "stb_gl.h" | 
						|
#include "stb_image.h" | 
						|
#include "stb_glprog.h" | 
						|
 | 
						|
#include "caveview.h" | 
						|
#include "cave_parse.h" | 
						|
#include "stb.h" | 
						|
#include "sdl.h" | 
						|
#include "sdl_thread.h" | 
						|
#include <math.h> | 
						|
#include <assert.h> | 
						|
 | 
						|
//#define STBVOX_CONFIG_TEX1_EDGE_CLAMP | 
						|
 | 
						|
 | 
						|
// currently no dynamic way to set mesh cache size or view distance | 
						|
//#define SHORTVIEW | 
						|
 | 
						|
 | 
						|
stbvox_mesh_maker g_mesh_maker; | 
						|
 | 
						|
GLuint main_prog; | 
						|
GLint uniform_locations[64]; | 
						|
 | 
						|
//#define MAX_QUADS_PER_DRAW        (65536 / 4) // assuming 16-bit indices, 4 verts per quad | 
						|
//#define FIXED_INDEX_BUFFER_SIZE   (MAX_QUADS_PER_DRAW * 6 * 2)  // 16*1024 * 12 == ~192KB | 
						|
 | 
						|
// while uploading texture data, this holds our each texture | 
						|
#define TEX_SIZE  64 | 
						|
uint32 texture[TEX_SIZE][TEX_SIZE]; | 
						|
 | 
						|
GLuint voxel_tex[2]; | 
						|
 | 
						|
// chunk state | 
						|
enum | 
						|
{ | 
						|
   STATE_invalid, | 
						|
   STATE_needed, | 
						|
   STATE_requested, | 
						|
   STATE_abandoned, | 
						|
   STATE_valid, | 
						|
}; | 
						|
 | 
						|
// mesh is 32x32x255 ... this is hardcoded in that | 
						|
// a mesh covers 2x2 minecraft chunks, no #defines for it | 
						|
typedef struct | 
						|
{ | 
						|
   int state; | 
						|
   int chunk_x, chunk_y; | 
						|
   int num_quads; | 
						|
   float priority; | 
						|
   int vbuf_size, fbuf_size; | 
						|
 | 
						|
   float transform[3][3]; | 
						|
   float bounds[2][3]; | 
						|
 | 
						|
   GLuint vbuf;// vbuf_tex; | 
						|
   GLuint fbuf, fbuf_tex; | 
						|
 | 
						|
} chunk_mesh; | 
						|
 | 
						|
void scale_texture(unsigned char *src, int x, int y, int w, int h) | 
						|
{ | 
						|
   int i,j,k; | 
						|
   assert(w == 256 && h == 256); | 
						|
   for (j=0; j < TEX_SIZE; ++j) { | 
						|
      for (i=0; i < TEX_SIZE; ++i) { | 
						|
         uint32 val=0; | 
						|
         for (k=0; k < 4; ++k) { | 
						|
            val >>= 8; | 
						|
            val += src[ 4*(x+(i>>2)) + 4*w*(y+(j>>2)) + k]<<24; | 
						|
         } | 
						|
         texture[j][i] = val; | 
						|
      } | 
						|
   } | 
						|
} | 
						|
 | 
						|
void build_base_texture(int n) | 
						|
{ | 
						|
   int x,y; | 
						|
   uint32 color = stb_rand() | 0x808080; | 
						|
   for (y=0; y<TEX_SIZE; ++y) | 
						|
      for (x=0; x<TEX_SIZE; ++x) { | 
						|
         texture[y][x] = (color + (stb_rand()&0x1f1f1f))|0xff000000; | 
						|
      } | 
						|
} | 
						|
 | 
						|
void build_overlay_texture(int n) | 
						|
{ | 
						|
   int x,y; | 
						|
   uint32 color = stb_rand(); | 
						|
   if (color & 16) | 
						|
      color = 0xff000000; | 
						|
   else | 
						|
      color = 0xffffffff; | 
						|
   for (y=0; y<TEX_SIZE; ++y) | 
						|
      for (x=0; x<TEX_SIZE; ++x) { | 
						|
         texture[y][x] = 0; | 
						|
      } | 
						|
 | 
						|
   for (y=0; y < TEX_SIZE/8; ++y) { | 
						|
      for (x=0; x < TEX_SIZE; ++x) { | 
						|
         texture[y][x] = color; | 
						|
         texture[TEX_SIZE-1-y][x] = color; | 
						|
         texture[x][y] = color; | 
						|
         texture[x][TEX_SIZE-1-y] = color; | 
						|
      } | 
						|
   } | 
						|
} | 
						|
 | 
						|
// view radius of about 1024 = 2048 columns / 32 columns-per-mesh = 2^11 / 2^5 = 64x64 | 
						|
// so we need bigger than 64x64 so we can precache, which means we have to be | 
						|
// non-power-of-two, or we have to be pretty huge | 
						|
#define CACHED_MESH_NUM_X   128 | 
						|
#define CACHED_MESH_NUM_Y   128 | 
						|
 | 
						|
 | 
						|
chunk_mesh cached_chunk_mesh[CACHED_MESH_NUM_Y][CACHED_MESH_NUM_X]; | 
						|
 | 
						|
void free_chunk(int slot_x, int slot_y) | 
						|
{ | 
						|
   chunk_mesh *cm = &cached_chunk_mesh[slot_y][slot_x]; | 
						|
   if (cm->state == STATE_valid) { | 
						|
      glDeleteTextures(1, &cm->fbuf_tex); | 
						|
      glDeleteBuffersARB(1, &cm->vbuf); | 
						|
      glDeleteBuffersARB(1, &cm->fbuf); | 
						|
      cached_chunk_mesh[slot_y][slot_x].state = STATE_invalid; | 
						|
   } | 
						|
} | 
						|
 | 
						|
void upload_mesh(chunk_mesh *cm, uint8 *build_buffer, uint8 *face_buffer) | 
						|
{ | 
						|
   glGenBuffersARB(1, &cm->vbuf); | 
						|
   glBindBufferARB(GL_ARRAY_BUFFER_ARB, cm->vbuf); | 
						|
   glBufferDataARB(GL_ARRAY_BUFFER_ARB, cm->num_quads*4*sizeof(uint32), build_buffer, GL_STATIC_DRAW_ARB); | 
						|
   glBindBufferARB(GL_ARRAY_BUFFER_ARB, 0); | 
						|
 | 
						|
   glGenBuffersARB(1, &cm->fbuf); | 
						|
   glBindBufferARB(GL_TEXTURE_BUFFER_ARB, cm->fbuf); | 
						|
   glBufferDataARB(GL_TEXTURE_BUFFER_ARB, cm->num_quads*sizeof(uint32), face_buffer , GL_STATIC_DRAW_ARB); | 
						|
   glBindBufferARB(GL_TEXTURE_BUFFER_ARB, 0); | 
						|
 | 
						|
   glGenTextures(1, &cm->fbuf_tex); | 
						|
   glBindTexture(GL_TEXTURE_BUFFER_ARB, cm->fbuf_tex); | 
						|
   glTexBufferARB(GL_TEXTURE_BUFFER_ARB, GL_RGBA8UI, cm->fbuf); | 
						|
   glBindTexture(GL_TEXTURE_BUFFER_ARB, 0); | 
						|
} | 
						|
 | 
						|
static void upload_mesh_data(raw_mesh *rm) | 
						|
{ | 
						|
   int cx = rm->cx; | 
						|
   int cy = rm->cy; | 
						|
   int slot_x = (cx >> 1) & (CACHED_MESH_NUM_X-1); | 
						|
   int slot_y = (cy >> 1) & (CACHED_MESH_NUM_Y-1); | 
						|
   chunk_mesh *cm; | 
						|
 | 
						|
   free_chunk(slot_x, slot_y); | 
						|
 | 
						|
   cm = &cached_chunk_mesh[slot_y][slot_x]; | 
						|
   cm->num_quads = rm->num_quads; | 
						|
 | 
						|
   upload_mesh(cm, rm->build_buffer, rm->face_buffer); | 
						|
   cm->vbuf_size = rm->num_quads*4*sizeof(uint32); | 
						|
   cm->fbuf_size = rm->num_quads*sizeof(uint32); | 
						|
   cm->priority = 100000; | 
						|
   cm->chunk_x = cx; | 
						|
   cm->chunk_y = cy; | 
						|
 | 
						|
   memcpy(cm->bounds, rm->bounds, sizeof(cm->bounds)); | 
						|
   memcpy(cm->transform, rm->transform, sizeof(cm->transform)); | 
						|
 | 
						|
   // write barrier here | 
						|
   cm->state = STATE_valid; | 
						|
} | 
						|
 | 
						|
GLint uniform_loc[16]; | 
						|
float table3[128][3]; | 
						|
float table4[64][4]; | 
						|
GLint tablei[2]; | 
						|
 | 
						|
float step=0; | 
						|
 | 
						|
#ifdef SHORTVIEW | 
						|
int view_dist_in_chunks = 50; | 
						|
#else | 
						|
int view_dist_in_chunks = 80; | 
						|
#endif | 
						|
 | 
						|
void setup_uniforms(float pos[3]) | 
						|
{ | 
						|
   int i,j; | 
						|
   step += 1.0f/60.0f; | 
						|
   for (i=0; i < STBVOX_UNIFORM_count; ++i) { | 
						|
      stbvox_uniform_info raw, *ui=&raw; | 
						|
      stbvox_get_uniform_info(&raw, i); | 
						|
      uniform_loc[i] = -1; | 
						|
 | 
						|
      if (i == STBVOX_UNIFORM_texscale || i == STBVOX_UNIFORM_texgen || i == STBVOX_UNIFORM_color_table) | 
						|
         continue; | 
						|
 | 
						|
      if (ui) { | 
						|
         void *data = ui->default_value; | 
						|
         uniform_loc[i] = stbgl_find_uniform(main_prog, ui->name); | 
						|
        switch (i) { | 
						|
            case STBVOX_UNIFORM_face_data: | 
						|
               tablei[0] = 2; | 
						|
               data = tablei; | 
						|
               break; | 
						|
 | 
						|
            case STBVOX_UNIFORM_tex_array: | 
						|
               glActiveTextureARB(GL_TEXTURE0_ARB); | 
						|
               glBindTexture(GL_TEXTURE_2D_ARRAY_EXT, voxel_tex[0]); | 
						|
               glActiveTextureARB(GL_TEXTURE1_ARB); | 
						|
               glBindTexture(GL_TEXTURE_2D_ARRAY_EXT, voxel_tex[1]); | 
						|
               glActiveTextureARB(GL_TEXTURE0_ARB); | 
						|
               tablei[0] = 0; | 
						|
               tablei[1] = 1; | 
						|
               data = tablei; | 
						|
               break; | 
						|
 | 
						|
            case STBVOX_UNIFORM_color_table: | 
						|
               data = ui->default_value; | 
						|
               ((float *)data)[63*4+3] = 2.0f; // emissive | 
						|
               break; | 
						|
 | 
						|
            case STBVOX_UNIFORM_camera_pos: | 
						|
               data = table3[0]; | 
						|
               table3[0][0] = pos[0]; | 
						|
               table3[0][1] = pos[1]; | 
						|
               table3[0][2] = pos[2]; | 
						|
               table3[0][3] = stb_max(0,(float)sin(step*2)*0.125f); | 
						|
               break; | 
						|
 | 
						|
            case STBVOX_UNIFORM_ambient: { | 
						|
               float bright = 1.0; | 
						|
               //float bright = 0.75; | 
						|
               float amb[3][3]; | 
						|
 | 
						|
               // ambient direction is sky-colored upwards | 
						|
               // "ambient" lighting is from above | 
						|
               table4[0][0] =  0.3f; | 
						|
               table4[0][1] = -0.5f; | 
						|
               table4[0][2] =  0.9f; | 
						|
 | 
						|
               amb[1][0] = 0.3f; amb[1][1] = 0.3f; amb[1][2] = 0.3f; // dark-grey | 
						|
               amb[2][0] = 1.0; amb[2][1] = 1.0; amb[2][2] = 1.0; // white | 
						|
 | 
						|
               // convert so (table[1]*dot+table[2]) gives | 
						|
               // above interpolation | 
						|
               //     lerp((dot+1)/2, amb[1], amb[2]) | 
						|
               //     amb[1] + (amb[2] - amb[1]) * (dot+1)/2 | 
						|
               //     amb[1] + (amb[2] - amb[1]) * dot/2 + (amb[2]-amb[1])/2 | 
						|
 | 
						|
               for (j=0; j < 3; ++j) { | 
						|
                  table4[1][j] = (amb[2][j] - amb[1][j])/2 * bright; | 
						|
                  table4[2][j] = (amb[1][j] + amb[2][j])/2 * bright; | 
						|
               } | 
						|
 | 
						|
               // fog color | 
						|
               table4[3][0] = 0.6f, table4[3][1] = 0.7f, table4[3][2] = 0.9f; | 
						|
               table4[3][3] = 1.0f / (view_dist_in_chunks * 16); | 
						|
               table4[3][3] *= table4[3][3]; | 
						|
 | 
						|
               data = table4; | 
						|
               break; | 
						|
            } | 
						|
         } | 
						|
 | 
						|
         switch (ui->type) { | 
						|
            case STBVOX_UNIFORM_TYPE_sampler: stbglUniform1iv(uniform_loc[i], ui->array_length, data); break; | 
						|
            case STBVOX_UNIFORM_TYPE_vec2:    stbglUniform2fv(uniform_loc[i], ui->array_length, data); break; | 
						|
            case STBVOX_UNIFORM_TYPE_vec3:    stbglUniform3fv(uniform_loc[i], ui->array_length, data); break; | 
						|
            case STBVOX_UNIFORM_TYPE_vec4:    stbglUniform4fv(uniform_loc[i], ui->array_length, data); break; | 
						|
         } | 
						|
      } | 
						|
   } | 
						|
} | 
						|
 | 
						|
GLuint unitex[64], unibuf[64]; | 
						|
void make_texture_buffer_for_uniform(int uniform, int slot) | 
						|
{ | 
						|
   GLenum type; | 
						|
   stbvox_uniform_info raw, *ui=&raw; | 
						|
   GLint uloc; | 
						|
    | 
						|
   stbvox_get_uniform_info(ui, uniform); | 
						|
   uloc = stbgl_find_uniform(main_prog, ui->name); | 
						|
 | 
						|
   if (uniform == STBVOX_UNIFORM_color_table) | 
						|
      ((float *)ui->default_value)[63*4+3] = 2.0f; // emissive | 
						|
 | 
						|
   glGenBuffersARB(1, &unibuf[uniform]); | 
						|
   glBindBufferARB(GL_ARRAY_BUFFER_ARB, unibuf[uniform]); | 
						|
   glBufferDataARB(GL_ARRAY_BUFFER_ARB, ui->array_length * ui->bytes_per_element, ui->default_value, GL_STATIC_DRAW_ARB); | 
						|
   glBindBufferARB(GL_ARRAY_BUFFER_ARB, 0); | 
						|
 | 
						|
   glGenTextures(1, &unitex[uniform]); | 
						|
   glBindTexture(GL_TEXTURE_BUFFER_ARB, unitex[uniform]); | 
						|
   switch (ui->type) { | 
						|
      case STBVOX_UNIFORM_TYPE_vec2: type = GL_RG32F; break; | 
						|
      case STBVOX_UNIFORM_TYPE_vec3: type = GL_RGB32F; break; | 
						|
      case STBVOX_UNIFORM_TYPE_vec4: type = GL_RGBA32F; break; | 
						|
      default: assert(0); | 
						|
   } | 
						|
   glTexBufferARB(GL_TEXTURE_BUFFER_ARB, type, unibuf[uniform]); | 
						|
   glBindTexture(GL_TEXTURE_BUFFER_ARB, 0); | 
						|
 | 
						|
   glActiveTextureARB(GL_TEXTURE0 + slot); | 
						|
   glBindTexture(GL_TEXTURE_BUFFER_ARB, unitex[uniform]); | 
						|
   glActiveTextureARB(GL_TEXTURE0); | 
						|
 | 
						|
   stbglUseProgram(main_prog); | 
						|
   stbglUniform1i(uloc, slot); | 
						|
} | 
						|
 | 
						|
#define MAX_MESH_WORKERS  8 | 
						|
#define MAX_CHUNK_LOAD_WORKERS 2 | 
						|
 | 
						|
int num_mesh_workers; | 
						|
int num_chunk_load_workers; | 
						|
 | 
						|
typedef struct | 
						|
{ | 
						|
   int state; | 
						|
   int request_cx; | 
						|
   int request_cy; | 
						|
   int padding[13]; | 
						|
 | 
						|
   SDL_sem * request_received; | 
						|
 | 
						|
   SDL_sem * chunk_server_done_processing; | 
						|
   int chunk_action; | 
						|
   int chunk_request_x; | 
						|
   int chunk_request_y; | 
						|
   fast_chunk *chunks[4][4]; | 
						|
 | 
						|
   int padding2[16]; | 
						|
   raw_mesh rm; | 
						|
   int padding3[16]; | 
						|
 | 
						|
   uint8 *build_buffer; | 
						|
   uint8 *face_buffer ; | 
						|
} mesh_worker; | 
						|
 | 
						|
enum | 
						|
{ | 
						|
   WSTATE_idle, | 
						|
   WSTATE_requested, | 
						|
   WSTATE_running, | 
						|
   WSTATE_mesh_ready, | 
						|
}; | 
						|
 | 
						|
mesh_worker mesh_data[MAX_MESH_WORKERS]; | 
						|
int num_meshes_started; // stats | 
						|
 | 
						|
int request_chunk(int chunk_x, int chunk_y); | 
						|
void update_meshes_from_render_thread(void); | 
						|
 | 
						|
unsigned char tex2_data[64][4]; | 
						|
 | 
						|
void init_tex2_gradient(void) | 
						|
{ | 
						|
   int i; | 
						|
   for (i=0; i < 16; ++i) { | 
						|
      tex2_data[i+ 0][0] = 64 + 12*i; | 
						|
      tex2_data[i+ 0][1] = 32; | 
						|
      tex2_data[i+ 0][2] = 64; | 
						|
 | 
						|
      tex2_data[i+16][0] = 255; | 
						|
      tex2_data[i+16][1] = 32 + 8*i; | 
						|
      tex2_data[i+16][2] = 64; | 
						|
 | 
						|
      tex2_data[i+32][0] = 255; | 
						|
      tex2_data[i+32][1] = 160; | 
						|
      tex2_data[i+32][2] = 64 + 12*i; | 
						|
 | 
						|
      tex2_data[i+48][0] = 255; | 
						|
      tex2_data[i+48][1] = 160 + 6*i; | 
						|
      tex2_data[i+48][2] = 255; | 
						|
   } | 
						|
} | 
						|
 | 
						|
void set_tex2_alpha(float fa) | 
						|
{ | 
						|
   int i; | 
						|
   int a = (int) stb_lerp(fa, 0, 255); | 
						|
   if (a < 0) a = 0; else if (a > 255) a = 255; | 
						|
   glBindTexture(GL_TEXTURE_2D_ARRAY_EXT, voxel_tex[1]); | 
						|
   for (i=0; i < 64; ++i) { | 
						|
      tex2_data[i][3] = a; | 
						|
      glTexSubImage3DEXT(GL_TEXTURE_2D_ARRAY_EXT, 0, 0,0,i, 1,1,1, GL_RGBA, GL_UNSIGNED_BYTE, tex2_data[i]); | 
						|
   } | 
						|
} | 
						|
 | 
						|
void render_init(void) | 
						|
{ | 
						|
   int i; | 
						|
   char *binds[] = { "attr_vertex", "attr_face", NULL }; | 
						|
   char *vertex; | 
						|
   char *fragment; | 
						|
   int w=0,h=0; | 
						|
 | 
						|
   unsigned char *texdata = stbi_load("terrain.png", &w, &h, NULL, 4); | 
						|
 | 
						|
   stbvox_init_mesh_maker(&g_mesh_maker); | 
						|
   for (i=0; i < num_mesh_workers; ++i) { | 
						|
      stbvox_init_mesh_maker(&mesh_data[i].rm.mm); | 
						|
   } | 
						|
 | 
						|
   vertex = stbvox_get_vertex_shader(); | 
						|
   fragment = stbvox_get_fragment_shader(); | 
						|
 | 
						|
   { | 
						|
      char error_buffer[1024]; | 
						|
      char *main_vertex[] = { vertex, NULL }; | 
						|
      char *main_fragment[] = { fragment, NULL }; | 
						|
      main_prog = stbgl_create_program(main_vertex, main_fragment, binds, error_buffer, sizeof(error_buffer)); | 
						|
      if (main_prog == 0) { | 
						|
         ods("Compile error for main shader: %s\n", error_buffer); | 
						|
         assert(0); | 
						|
         exit(1); | 
						|
      } | 
						|
   } | 
						|
   //init_index_buffer(); | 
						|
 | 
						|
   make_texture_buffer_for_uniform(STBVOX_UNIFORM_texscale     , 3); | 
						|
   make_texture_buffer_for_uniform(STBVOX_UNIFORM_texgen       , 4); | 
						|
   make_texture_buffer_for_uniform(STBVOX_UNIFORM_color_table  , 5); | 
						|
 | 
						|
   glGenTextures(2, voxel_tex); | 
						|
 | 
						|
   glBindTexture(GL_TEXTURE_2D_ARRAY_EXT, voxel_tex[0]); | 
						|
   glTexImage3DEXT(GL_TEXTURE_2D_ARRAY_EXT, 0, GL_RGBA, | 
						|
                      TEX_SIZE,TEX_SIZE,256, | 
						|
                      0,GL_RGBA,GL_UNSIGNED_BYTE,NULL); | 
						|
   for (i=0; i < 256; ++i) { | 
						|
      if (texdata) | 
						|
         scale_texture(texdata, (i&15)*w/16, (h/16)*(i>>4), w,h); | 
						|
      else | 
						|
         build_base_texture(i); | 
						|
      glTexSubImage3DEXT(GL_TEXTURE_2D_ARRAY_EXT, 0, 0,0,i, TEX_SIZE,TEX_SIZE,1, GL_RGBA, GL_UNSIGNED_BYTE, texture[0]); | 
						|
   } | 
						|
   glTexParameteri(GL_TEXTURE_2D_ARRAY_EXT, GL_TEXTURE_MIN_FILTER, GL_LINEAR_MIPMAP_LINEAR); | 
						|
   glTexParameteri(GL_TEXTURE_2D_ARRAY_EXT, GL_TEXTURE_MAG_FILTER, GL_LINEAR); | 
						|
   glTexParameteri(GL_TEXTURE_2D_ARRAY_EXT, GL_TEXTURE_MAX_ANISOTROPY_EXT, 16); | 
						|
   #ifdef STBVOX_CONFIG_TEX1_EDGE_CLAMP | 
						|
   glTexParameteri(GL_TEXTURE_2D_ARRAY_EXT, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); | 
						|
   glTexParameteri(GL_TEXTURE_2D_ARRAY_EXT, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); | 
						|
   #endif | 
						|
 | 
						|
   glGenerateMipmapEXT(GL_TEXTURE_2D_ARRAY_EXT); | 
						|
 | 
						|
   glBindTexture(GL_TEXTURE_2D_ARRAY_EXT, voxel_tex[1]); | 
						|
   glTexImage3DEXT(GL_TEXTURE_2D_ARRAY_EXT, 0, GL_RGBA, | 
						|
                      1,1,64, | 
						|
                      0,GL_RGBA,GL_UNSIGNED_BYTE,NULL); | 
						|
   init_tex2_gradient(); | 
						|
   set_tex2_alpha(0.0); | 
						|
   #if 0 | 
						|
   for (i=0; i < 128; ++i) { | 
						|
      //build_overlay_texture(i); | 
						|
      glTexSubImage3DEXT(GL_TEXTURE_2D_ARRAY_EXT, 0, 0,0,i, TEX_SIZE,TEX_SIZE,1, GL_RGBA, GL_UNSIGNED_BYTE, texture[0]); | 
						|
   } | 
						|
   #endif | 
						|
   glTexParameteri(GL_TEXTURE_2D_ARRAY_EXT, GL_TEXTURE_MIN_FILTER, GL_LINEAR_MIPMAP_LINEAR); | 
						|
   glTexParameteri(GL_TEXTURE_2D_ARRAY_EXT, GL_TEXTURE_MAG_FILTER, GL_LINEAR); | 
						|
   glGenerateMipmapEXT(GL_TEXTURE_2D_ARRAY_EXT); | 
						|
} | 
						|
 | 
						|
void world_init(void) | 
						|
{ | 
						|
   int a,b,x,y; | 
						|
 | 
						|
   Uint64 start_time, end_time; | 
						|
   #ifdef NDEBUG | 
						|
   int range = 32; | 
						|
   #else | 
						|
   int range = 12; | 
						|
   #endif | 
						|
 | 
						|
   start_time = SDL_GetPerformanceCounter(); | 
						|
 | 
						|
   // iterate in 8x8 clusters of qchunks at a time to get better converted-chunk-cache reuse | 
						|
   // than a purely row-by-row ordering is (single-threaded this is a bigger win than | 
						|
   // any of the above optimizations were, since it halves zlib/mc-conversion costs) | 
						|
   for (x=-range; x <= range; x += 16) | 
						|
      for (y=-range; y <= range; y += 16) | 
						|
         for (b=y; b < y+16 && b <= range; b += 2) | 
						|
            for (a=x; a < x+16 && a <= range; a += 2) | 
						|
               while (!request_chunk(a, b)) { // if request fails, all threads are busy | 
						|
                  update_meshes_from_render_thread(); | 
						|
                  SDL_Delay(1); | 
						|
               } | 
						|
 | 
						|
   // wait until all the workers are done, | 
						|
   // (this is only needed if we want to time | 
						|
   // when the build finishes, or when we want to reset the | 
						|
   // cache size; otherwise we could just go ahead and | 
						|
   // start rendering whatever we've got) | 
						|
   for(;;) { | 
						|
      int i; | 
						|
      update_meshes_from_render_thread(); | 
						|
      for (i=0; i < num_mesh_workers; ++i) | 
						|
         if (mesh_data[i].state != WSTATE_idle) | 
						|
            break; | 
						|
      if (i == num_mesh_workers) | 
						|
         break; | 
						|
      SDL_Delay(3); | 
						|
   } | 
						|
 | 
						|
   end_time = SDL_GetPerformanceCounter(); | 
						|
   ods("Build time: %7.2fs\n", (end_time - start_time) / (float) SDL_GetPerformanceFrequency()); | 
						|
 | 
						|
   // don't waste lots of storage on chunk caches once it's finished starting-up; | 
						|
   // this was only needed to be this large because we worked in large blocks | 
						|
   // to maximize sharing | 
						|
   reset_cache_size(32); | 
						|
} | 
						|
 | 
						|
extern SDL_mutex * chunk_cache_mutex; | 
						|
 | 
						|
int mesh_worker_handler(void *data) | 
						|
{ | 
						|
   mesh_worker *mw = data; | 
						|
   mw->face_buffer = malloc(FACE_BUFFER_SIZE); | 
						|
   mw->build_buffer = malloc(BUILD_BUFFER_SIZE); | 
						|
 | 
						|
   // this loop only works because the compiler can't | 
						|
   // tell that the SDL_calls don't access mw->state; | 
						|
   // really we should barrier that stuff | 
						|
   for(;;) { | 
						|
      int i,j; | 
						|
      int cx,cy; | 
						|
 | 
						|
      // wait for a chunk request | 
						|
      SDL_SemWait(mw->request_received); | 
						|
 | 
						|
      // analyze the chunk request | 
						|
      assert(mw->state == WSTATE_requested); | 
						|
      cx = mw->request_cx; | 
						|
      cy = mw->request_cy; | 
						|
 | 
						|
      // this is inaccurate as it can block while another thread has the cache locked | 
						|
      mw->state = WSTATE_running; | 
						|
 | 
						|
      // get the chunks we need (this takes a lock and caches them) | 
						|
      for (j=0; j < 4; ++j) | 
						|
         for (i=0; i < 4; ++i) | 
						|
            mw->chunks[j][i] = get_converted_fastchunk(cx-1 + i, cy-1 + j); | 
						|
 | 
						|
      // build the mesh based on the chunks | 
						|
      mw->rm.build_buffer = mw->build_buffer; | 
						|
      mw->rm.face_buffer = mw->face_buffer; | 
						|
      build_chunk(cx, cy, mw->chunks, &mw->rm); | 
						|
      mw->state = WSTATE_mesh_ready; | 
						|
      // don't need to notify of this, because it gets polled | 
						|
 | 
						|
      // when done, free the chunks | 
						|
 | 
						|
      // for efficiency we just take the mutex once around the whole thing, | 
						|
      // though this spreads the mutex logic over two files | 
						|
      SDL_LockMutex(chunk_cache_mutex); | 
						|
      for (j=0; j < 4; ++j) | 
						|
         for (i=0; i < 4; ++i) { | 
						|
            deref_fastchunk(mw->chunks[j][i]); | 
						|
            mw->chunks[j][i] = NULL; | 
						|
         } | 
						|
      SDL_UnlockMutex(chunk_cache_mutex); | 
						|
   } | 
						|
   return 0; | 
						|
} | 
						|
 | 
						|
int request_chunk(int chunk_x, int chunk_y) | 
						|
{ | 
						|
   int i; | 
						|
   for (i=0; i < num_mesh_workers; ++i) { | 
						|
      mesh_worker *mw = &mesh_data[i]; | 
						|
      if (mw->state == WSTATE_idle) { | 
						|
         mw->request_cx = chunk_x; | 
						|
         mw->request_cy = chunk_y; | 
						|
         mw->state = WSTATE_requested; | 
						|
         SDL_SemPost(mw->request_received); | 
						|
         ++num_meshes_started; | 
						|
         return 1; | 
						|
      } | 
						|
   } | 
						|
   return 0; | 
						|
} | 
						|
 | 
						|
void prepare_threads(void) | 
						|
{ | 
						|
   int i; | 
						|
   int num_proc = SDL_GetCPUCount(); | 
						|
 | 
						|
   if (num_proc > 6) | 
						|
      num_mesh_workers = num_proc/2; | 
						|
   else if (num_proc > 4) | 
						|
      num_mesh_workers = 4; | 
						|
   else  | 
						|
      num_mesh_workers = num_proc-1; | 
						|
 | 
						|
// @TODO | 
						|
//   Thread usage is probably pretty terrible; need to make a | 
						|
//   separate queue of needed chunks, instead of just generating | 
						|
//   one request per thread per frame, and a separate queue of | 
						|
//   results. (E.g. If it takes 1.5 frames to build mesh, thread | 
						|
//   is idle for 0.5 frames.) To fake this for now, I've just | 
						|
//   doubled the number of threads to let those serve as a 'queue', | 
						|
//   but that's dumb. | 
						|
 | 
						|
   num_mesh_workers *= 2; // try to get better thread usage | 
						|
 | 
						|
   if (num_mesh_workers > MAX_MESH_WORKERS) | 
						|
      num_mesh_workers = MAX_MESH_WORKERS; | 
						|
 | 
						|
   for (i=0; i < num_mesh_workers; ++i) { | 
						|
      mesh_worker *data = &mesh_data[i]; | 
						|
      data->request_received = SDL_CreateSemaphore(0); | 
						|
      data->chunk_server_done_processing = SDL_CreateSemaphore(0); | 
						|
      SDL_CreateThread(mesh_worker_handler, "mesh worker", data); | 
						|
   } | 
						|
} | 
						|
 | 
						|
 | 
						|
// "better" buffer uploading | 
						|
#if 0 | 
						|
   if (glBufferStorage) { | 
						|
      glDeleteBuffersARB(1, &vb->vbuf); | 
						|
      glGenBuffersARB(1, &vb->vbuf); | 
						|
 | 
						|
      glBindBufferARB(GL_ARRAY_BUFFER_ARB, vb->vbuf); | 
						|
      glBufferStorage(GL_ARRAY_BUFFER_ARB, sizeof(build_buffer), build_buffer, 0); | 
						|
      glBindBufferARB(GL_ARRAY_BUFFER_ARB, 0); | 
						|
   } else { | 
						|
      glBindBufferARB(GL_ARRAY_BUFFER_ARB, vb->vbuf); | 
						|
      glBufferDataARB(GL_ARRAY_BUFFER_ARB, sizeof(build_buffer), build_buffer, GL_STATIC_DRAW_ARB); | 
						|
      glBindBufferARB(GL_ARRAY_BUFFER_ARB, 0); | 
						|
   } | 
						|
#endif | 
						|
 | 
						|
 | 
						|
typedef struct | 
						|
{ | 
						|
   float x,y,z,w; | 
						|
} plane; | 
						|
 | 
						|
static plane frustum[6]; | 
						|
 | 
						|
static void matd_mul(double out[4][4], double src1[4][4], double src2[4][4]) | 
						|
{ | 
						|
   int i,j,k; | 
						|
   for (j=0; j < 4; ++j) { | 
						|
      for (i=0; i < 4; ++i) { | 
						|
         double t=0; | 
						|
         for (k=0; k < 4; ++k) | 
						|
            t += src1[k][i] * src2[j][k]; | 
						|
         out[i][j] = t; | 
						|
      } | 
						|
   } | 
						|
} | 
						|
 | 
						|
// https://fgiesen.wordpress.com/2012/08/31/frustum-planes-from-the-projection-matrix/ | 
						|
static void compute_frustum(void) | 
						|
{ | 
						|
   int i; | 
						|
   GLdouble mv[4][4],proj[4][4], mvproj[4][4]; | 
						|
   glGetDoublev(GL_MODELVIEW_MATRIX , mv[0]); | 
						|
   glGetDoublev(GL_PROJECTION_MATRIX, proj[0]); | 
						|
   matd_mul(mvproj, proj, mv); | 
						|
   for (i=0; i < 4; ++i) { | 
						|
      (&frustum[0].x)[i] = (float) (mvproj[3][i] + mvproj[0][i]); | 
						|
      (&frustum[1].x)[i] = (float) (mvproj[3][i] - mvproj[0][i]); | 
						|
      (&frustum[2].x)[i] = (float) (mvproj[3][i] + mvproj[1][i]); | 
						|
      (&frustum[3].x)[i] = (float) (mvproj[3][i] - mvproj[1][i]); | 
						|
      (&frustum[4].x)[i] = (float) (mvproj[3][i] + mvproj[2][i]); | 
						|
      (&frustum[5].x)[i] = (float) (mvproj[3][i] - mvproj[2][i]); | 
						|
   }    | 
						|
} | 
						|
 | 
						|
static int test_plane(plane *p, float x0, float y0, float z0, float x1, float y1, float z1) | 
						|
{ | 
						|
   // return false if the box is entirely behind the plane | 
						|
   float d=0; | 
						|
   assert(x0 <= x1 && y0 <= y1 && z0 <= z1); | 
						|
   if (p->x > 0) d += x1*p->x; else d += x0*p->x; | 
						|
   if (p->y > 0) d += y1*p->y; else d += y0*p->y; | 
						|
   if (p->z > 0) d += z1*p->z; else d += z0*p->z; | 
						|
   return d + p->w >= 0; | 
						|
} | 
						|
 | 
						|
static int is_box_in_frustum(float *bmin, float *bmax) | 
						|
{ | 
						|
   int i; | 
						|
   for (i=0; i < 5; ++i) | 
						|
      if (!test_plane(&frustum[i], bmin[0], bmin[1], bmin[2], bmax[0], bmax[1], bmax[2])) | 
						|
         return 0; | 
						|
   return 1; | 
						|
} | 
						|
 | 
						|
float compute_priority(int cx, int cy, float x, float y) | 
						|
{ | 
						|
   float distx, disty, dist2; | 
						|
   distx = (cx*16+8) - x; | 
						|
   disty = (cy*16+8) - y; | 
						|
   dist2 = distx*distx + disty*disty; | 
						|
   return view_dist_in_chunks*view_dist_in_chunks * 16 * 16 - dist2; | 
						|
} | 
						|
 | 
						|
int chunk_locations, chunks_considered, chunks_in_frustum; | 
						|
int quads_considered, quads_rendered; | 
						|
int chunk_storage_rendered, chunk_storage_considered, chunk_storage_total; | 
						|
int update_frustum = 1; | 
						|
 | 
						|
#ifdef SHORTVIEW | 
						|
int max_chunk_storage = 450 << 20; | 
						|
int min_chunk_storage = 350 << 20; | 
						|
#else | 
						|
int max_chunk_storage = 900 << 20; | 
						|
int min_chunk_storage = 800 << 20; | 
						|
#endif | 
						|
 | 
						|
float min_priority = -500; // this really wants to be in unit space, not squared space | 
						|
 | 
						|
int num_meshes_uploaded; | 
						|
 | 
						|
void update_meshes_from_render_thread(void) | 
						|
{ | 
						|
   int i; | 
						|
   for (i=0; i < num_mesh_workers; ++i) { | 
						|
      mesh_worker *mw = &mesh_data[i]; | 
						|
      if (mw->state == WSTATE_mesh_ready) { | 
						|
         upload_mesh_data(&mw->rm); | 
						|
         ++num_meshes_uploaded; | 
						|
         mw->state = WSTATE_idle; | 
						|
      } | 
						|
   } | 
						|
} | 
						|
 | 
						|
extern float tex2_alpha; | 
						|
extern int global_hack; | 
						|
int num_threads_active; | 
						|
float chunk_server_activity; | 
						|
 | 
						|
void render_caves(float campos[3]) | 
						|
{ | 
						|
   float x = campos[0], y = campos[1]; | 
						|
   int qchunk_x, qchunk_y; | 
						|
   int cam_x, cam_y; | 
						|
   int i,j, rad; | 
						|
 | 
						|
   compute_frustum(); | 
						|
 | 
						|
   chunk_locations = chunks_considered = chunks_in_frustum = 0; | 
						|
   quads_considered = quads_rendered = 0; | 
						|
   chunk_storage_total = chunk_storage_considered = chunk_storage_rendered = 0; | 
						|
 | 
						|
   cam_x = (int) floor(x+0.5); | 
						|
   cam_y = (int) floor(y+0.5); | 
						|
 | 
						|
   qchunk_x = (((int) floor(x)+16) >> 5) << 1; | 
						|
   qchunk_y = (((int) floor(y)+16) >> 5) << 1; | 
						|
 | 
						|
   glEnable(GL_ALPHA_TEST); | 
						|
   glAlphaFunc(GL_GREATER, 0.5); | 
						|
 | 
						|
   stbglUseProgram(main_prog); | 
						|
   setup_uniforms(campos); // set uniforms to default values inefficiently | 
						|
   glActiveTextureARB(GL_TEXTURE2_ARB); | 
						|
   stbglEnableVertexAttribArray(0); | 
						|
 | 
						|
   { | 
						|
      float lighting[2][3] = { { campos[0],campos[1],campos[2] }, { 0.75,0.75,0.65f } }; | 
						|
      float bright = 8; | 
						|
      lighting[1][0] *= bright; | 
						|
      lighting[1][1] *= bright; | 
						|
      lighting[1][2] *= bright; | 
						|
      stbglUniform3fv(stbgl_find_uniform(main_prog, "light_source"), 2, lighting[0]); | 
						|
   } | 
						|
 | 
						|
   if (global_hack) | 
						|
      set_tex2_alpha(tex2_alpha); | 
						|
 | 
						|
   num_meshes_uploaded = 0; | 
						|
   update_meshes_from_render_thread(); | 
						|
 | 
						|
   // traverse all in-range chunks and analyze them | 
						|
   for (j=-view_dist_in_chunks; j <= view_dist_in_chunks; j += 2) { | 
						|
      for (i=-view_dist_in_chunks; i <= view_dist_in_chunks; i += 2) { | 
						|
         float priority; | 
						|
         int cx = qchunk_x + i; | 
						|
         int cy = qchunk_y + j; | 
						|
 | 
						|
         priority = compute_priority(cx, cy, x, y); | 
						|
         if (priority >= min_priority) { | 
						|
            int slot_x = (cx>>1) & (CACHED_MESH_NUM_X-1); | 
						|
            int slot_y = (cy>>1) & (CACHED_MESH_NUM_Y-1); | 
						|
            chunk_mesh *cm = &cached_chunk_mesh[slot_y][slot_x]; | 
						|
            ++chunk_locations; | 
						|
            if (cm->state == STATE_valid && priority >= 0) { | 
						|
               // check if chunk pos actually matches | 
						|
               if (cm->chunk_x != cx || cm->chunk_y != cy) { | 
						|
                  // we have a stale chunk we need to recreate | 
						|
                  free_chunk(slot_x, slot_y); // it probably will have already gotten freed, but just in case | 
						|
               } | 
						|
            } | 
						|
            if (cm->state == STATE_invalid) { | 
						|
               cm->chunk_x = cx; | 
						|
               cm->chunk_y = cy; | 
						|
               cm->state = STATE_needed; | 
						|
            } | 
						|
            cm->priority = priority; | 
						|
         } | 
						|
      } | 
						|
   } | 
						|
 | 
						|
   // draw front-to-back | 
						|
   for (rad = 0; rad <= view_dist_in_chunks; rad += 2) { | 
						|
      for (j=-rad; j <= rad; j += 2) { | 
						|
         // if j is +- rad, then iterate i through all values | 
						|
         // if j isn't +-rad, then i should be only -rad & rad | 
						|
         int step = 2; | 
						|
         if (abs(j) != rad) | 
						|
            step = 2*rad; | 
						|
         for (i=-rad; i <= rad; i += step) { | 
						|
            int cx = qchunk_x + i; | 
						|
            int cy = qchunk_y + j; | 
						|
            int slot_x = (cx>>1) & (CACHED_MESH_NUM_X-1); | 
						|
            int slot_y = (cy>>1) & (CACHED_MESH_NUM_Y-1); | 
						|
            chunk_mesh *cm = &cached_chunk_mesh[slot_y][slot_x]; | 
						|
            if (cm->state == STATE_valid && cm->priority >= 0) { | 
						|
               ++chunks_considered; | 
						|
               quads_considered += cm->num_quads; | 
						|
               if (is_box_in_frustum(cm->bounds[0], cm->bounds[1])) { | 
						|
                  ++chunks_in_frustum; | 
						|
 | 
						|
                  // @TODO if in range | 
						|
                  stbglUniform3fv(uniform_loc[STBVOX_UNIFORM_transform], 3, cm->transform[0]); | 
						|
                  glBindBufferARB(GL_ARRAY_BUFFER_ARB, cm->vbuf); | 
						|
                  glVertexAttribIPointer(0, 1, GL_UNSIGNED_INT, 4, (void*) 0); | 
						|
                  glBindTexture(GL_TEXTURE_BUFFER_ARB, cm->fbuf_tex); | 
						|
                  glDrawArrays(GL_QUADS, 0, cm->num_quads*4); | 
						|
                  quads_rendered += cm->num_quads; | 
						|
 | 
						|
                  chunk_storage_rendered += cm->vbuf_size + cm->fbuf_size; | 
						|
               } | 
						|
               chunk_storage_considered += cm->vbuf_size + cm->fbuf_size; | 
						|
            } | 
						|
         } | 
						|
      } | 
						|
   } | 
						|
 | 
						|
   stbglDisableVertexAttribArray(0); | 
						|
   glBindBufferARB(GL_ARRAY_BUFFER_ARB, 0); | 
						|
   glActiveTextureARB(GL_TEXTURE0_ARB); | 
						|
 | 
						|
   stbglUseProgram(0); | 
						|
   num_meshes_started = 0; | 
						|
 | 
						|
   { | 
						|
      #define MAX_QUEUE  8 | 
						|
      float highest_priority[MAX_QUEUE]; | 
						|
      int highest_i[MAX_QUEUE], highest_j[MAX_QUEUE]; | 
						|
      float lowest_priority = view_dist_in_chunks * view_dist_in_chunks * 16 * 16.0f; | 
						|
      int lowest_i = -1, lowest_j = -1; | 
						|
 | 
						|
      for (i=0; i < MAX_QUEUE; ++i) { | 
						|
         highest_priority[i] = min_priority; | 
						|
         highest_i[i] = -1; | 
						|
         highest_j[i] = -1; | 
						|
      } | 
						|
 | 
						|
      for (j=0; j < CACHED_MESH_NUM_Y; ++j) { | 
						|
         for (i=0; i < CACHED_MESH_NUM_X; ++i) { | 
						|
            chunk_mesh *cm = &cached_chunk_mesh[j][i]; | 
						|
            if (cm->state == STATE_valid) { | 
						|
               cm->priority = compute_priority(cm->chunk_x, cm->chunk_y, x, y); | 
						|
               chunk_storage_total += cm->vbuf_size + cm->fbuf_size; | 
						|
               if (cm->priority < lowest_priority) { | 
						|
                  lowest_priority = cm->priority; | 
						|
                  lowest_i = i; | 
						|
                  lowest_j = j; | 
						|
               } | 
						|
            } | 
						|
            if (cm->state == STATE_needed) { | 
						|
               cm->priority = compute_priority(cm->chunk_x, cm->chunk_y, x, y); | 
						|
               if (cm->priority < min_priority) | 
						|
                  cm->state = STATE_invalid; | 
						|
               else if (cm->priority > highest_priority[0]) { | 
						|
                  int k; | 
						|
                  highest_priority[0] = cm->priority; | 
						|
                  highest_i[0] = i; | 
						|
                  highest_j[0] = j; | 
						|
                  // bubble this up to right place | 
						|
                  for (k=0; k < MAX_QUEUE-1; ++k) { | 
						|
                     if (highest_priority[k] > highest_priority[k+1]) { | 
						|
                        highest_priority[k] = highest_priority[k+1]; | 
						|
                        highest_priority[k+1] = cm->priority; | 
						|
                        highest_i[k] = highest_i[k+1]; | 
						|
                        highest_i[k+1] = i; | 
						|
                        highest_j[k] = highest_j[k+1]; | 
						|
                        highest_j[k+1] = j; | 
						|
                     } else { | 
						|
                        break; | 
						|
                     } | 
						|
                  } | 
						|
               } | 
						|
            } | 
						|
         } | 
						|
      } | 
						|
 | 
						|
 | 
						|
      // I couldn't find any straightforward logic that avoids | 
						|
      // the hysteresis problem of continually creating & freeing | 
						|
      // a block on the margin, so I just don't free a block until | 
						|
      // it's out of range, but this doesn't actually correctly | 
						|
      // handle when the cache is too small for the given range | 
						|
      if (chunk_storage_total >= min_chunk_storage && lowest_i >= 0) { | 
						|
         if (cached_chunk_mesh[lowest_j][lowest_i].priority < -1200) // -1000? 0? | 
						|
            free_chunk(lowest_i, lowest_j); | 
						|
      } | 
						|
 | 
						|
      if (chunk_storage_total < max_chunk_storage && highest_i[0] >= 0) { | 
						|
         for (j=MAX_QUEUE-1; j >= 0; --j) { | 
						|
            if (highest_j[0] >= 0) { | 
						|
               chunk_mesh *cm = &cached_chunk_mesh[highest_j[j]][highest_i[j]]; | 
						|
               if (request_chunk(cm->chunk_x, cm->chunk_y)) { | 
						|
                  cm->state = STATE_requested; | 
						|
               } else { | 
						|
                  // if we couldn't queue this one, skip the remainder | 
						|
                  break; | 
						|
               } | 
						|
            } | 
						|
         } | 
						|
      } | 
						|
   } | 
						|
 | 
						|
   update_meshes_from_render_thread(); | 
						|
 | 
						|
   num_threads_active = 0; | 
						|
   for (i=0; i < num_mesh_workers; ++i) { | 
						|
      num_threads_active += (mesh_data[i].state == WSTATE_running); | 
						|
   } | 
						|
}
 | 
						|
 |