您的位置：首页 > 其它

Studying note of GCC-3.4.6 source (39)

2010-04-30 11:01 399 查看

4.1.4. Adjust options according to target

Returned from c_common_post_options, continue with process_options. Remember that input_filename accesses file field of input_location which tracks the current file being compiled.

process_options (continue)

4283 input_filename = main_input_filename;
4284
4285 #ifdef OVERRIDE_OPTIONS
4286 /* Some machines may reject certain combinations of options. */
4287 OVERRIDE_OPTIONS;
4288 #endif

If the back-end has special requirment upon the options related to target, it needs define macro OVERRIDER_OPTIONS above at line 4285 to provide the handler. For x86 target, the macro is defined as below function.

1050 void
1051 override_options (void) in i386.c
1052 {
1053 int i;
1054 /* Comes from final.c -- no real reason to change it. */
1055 #define MAX_CODE_ALIGN 16
1056
1057 static struct ptt
1058 {
1059 const struct processor_costs *cost; /* Processor costs */
1060 const int target_enable; /* Target flags to enable. */
1061 const int target_disable; /* Target flags to disable. */
1062 const int align_loop; /* Default alignments. */
1063 const int align_loop_max_skip;
1064 const int align_jump;
1065 const int align_jump_max_skip;
1066 const int align_func;
1067 }
1068 const processor_target_table[PROCESSOR_max] =
1069 {
1070 {&i386_cost, 0, 0, 4, 3, 4, 3, 4},
1071 {&i486_cost, 0, 0, 16, 15, 16, 15, 16},
1072 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16},
1073 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16},
1074 {&k6_cost, 0, 0, 32, 7, 32, 7, 32},
1075 {&athlon_cost, 0, 0, 16, 7, 16, 7, 16},
1076 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0},
1077 {&k8_cost, 0, 0, 16, 7, 16, 7, 16}
1078 };
1079
1080 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
1081 static struct pta
1082 {
1083 const char *const name; /* processor name or nickname. */
1084 const enum processor_type processor;
1085 const enum pta_flags
1086 {
1087 PTA_SSE = 1,
1088 PTA_SSE2 = 2,
1089 PTA_SSE3 = 4,
1090 PTA_MMX = 8,
1091 PTA_PREFETCH_SSE = 16,
1092 PTA_3DNOW = 32,
1093 PTA_3DNOW_A = 64,
1094 PTA_64BIT = 128
1095 } flags;
1096 }
1097 const processor_alias_table[] =
1098 {
1099 {"i386", PROCESSOR_I386, 0},
1100 {"i486", PROCESSOR_I486, 0},
1101 {"i586", PROCESSOR_PENTIUM, 0},
1102 {"pentium", PROCESSOR_PENTIUM, 0},
1103 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
1104 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
1105 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1106 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1107 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_PREFETCH_SSE | PTA_SSE},
1108 {"i686", PROCESSOR_PENTIUMPRO, 0},
1109 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
1110 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
1111 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1112 {"pentium3m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1113 {"pentium-m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE | PTA_SSE2},
1114 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1115 | PTA_MMX | PTA_PREFETCH_SSE},
1116 {"pentium4m", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1117 | PTA_MMX | PTA_PREFETCH_SSE},
1118 {"prescott", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2 | PTA_SSE3
1119 | PTA_MMX | PTA_PREFETCH_SSE},
1120 {"nocona", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_64BIT
1121 | PTA_MMX | PTA_PREFETCH_SSE},
1122 {"k6", PROCESSOR_K6, PTA_MMX},
1123 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1124 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1125 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1126 | PTA_3DNOW_A},
1127 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
1128 | PTA_3DNOW | PTA_3DNOW_A},
1129 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1130 | PTA_3DNOW_A | PTA_SSE},
1131 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1132 | PTA_3DNOW_A | PTA_SSE},
1133 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1134 | PTA_3DNOW_A | PTA_SSE},
1135 {"x86-64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_64BIT
1136 | PTA_SSE | PTA_SSE2 },
1137 {"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1138 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1139 {"opteron", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1140 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1141 {"athlon64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1142 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1143 {"athlon-fx", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1144 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1145 };
1146
1147 int const pta_size = ARRAY_SIZE (processor_alias_table);

Above, processor_target_table and processor_alias_table have their types declared just before them so these types can’t be used otherwhere. TARGET_CPU_DEFAULT_NAMES at line 1080 defines names of CPU of the family.

710 #define TARGET_CPU_DEFAULT_NAMES {"i386", "i486", "pentium", "pentium-mmx",/
711 "pentiumpro", "pentium2", "pentium3", /
712 "pentium4", "k6", "k6-2", "k6-3",/
713 "athlon", "athlon-4", "k8", /
714 "pentium-m", "prescott", "nocona"}

At line 1085 pta_flags, describes the attribution of register sets available upon certain chip.

override_options (continue)

1149 /* Set the default values for switches whose default depends on TARGET_64BIT
1150 in case they weren't overwritten by command line options. */
1151 if (TARGET_64BIT)
1152 {
1153 if (flag_omit_frame_pointer == 2)
1154 flag_omit_frame_pointer = 1;
1155 if (flag_asynchronous_unwind_tables == 2)
1156 flag_asynchronous_unwind_tables = 1;
1157 if (flag_pcc_struct_return == 2)
1158 flag_pcc_struct_return = 0;
1159 }
1160 else
1161 {
1162 if (flag_omit_frame_pointer == 2)
1163 flag_omit_frame_pointer = 0;
1164 if (flag_asynchronous_unwind_tables == 2)
1165 flag_asynchronous_unwind_tables = 0;
1166 if (flag_pcc_struct_return == 2)
1167 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
1168 }
1169
1170 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1171 SUBTARGET_OVERRIDE_OPTIONS;
1172 #endif
1173
1174 if (!ix86_tune_string && ix86_arch_string)
1175 ix86_tune_string = ix86_arch_string;
1176 if (!ix86_tune_string)
1177 ix86_tune_string = cpu_names [TARGET_CPU_DEFAULT];
1178 if (!ix86_arch_string)
1179 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
1180
1181 if (ix86_cmodel_string != 0)
1182 {
1183 if (!strcmp (ix86_cmodel_string, "small"))
1184 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1185 else if (flag_pic)
1186 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
1187 else if (!strcmp (ix86_cmodel_string, "32"))
1188 ix86_cmodel = CM_32;
1189 else if (!strcmp (ix86_cmodel_string, "kernel") && ! flag_pic)
1190 ix86_cmodel = CM_KERNEL;
1191 else if (!strcmp (ix86_cmodel_string, "medium") && ! flag_pic)
1192 ix86_cmodel = CM_MEDIUM;
1193 else if (!strcmp (ix86_cmodel_string, "large") && ! flag_pic)
1194 ix86_cmodel = CM_LARGE;
1195 else
1196 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
1197 }
1198 else
1199 {
1200 ix86_cmodel = CM_32;
1201 if (TARGET_64BIT)
1202 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1203 }
1204 if (ix86_asm_string != 0)
1205 {
1206 if (!strcmp (ix86_asm_string, "intel"))
1207 ix86_asm_dialect = ASM_INTEL;
1208 else if (!strcmp (ix86_asm_string, "att"))
1209 ix86_asm_dialect = ASM_ATT;
1210 else
1211 error ("bad value (%s) for -masm= switch", ix86_asm_string);
1212 }
1213 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
1214 error ("code model `%s' not supported in the %s bit mode",
1215 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
1216 if (ix86_cmodel == CM_LARGE)
1217 sorry ("code model `large' not supported yet");
1218 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
1219 sorry ("%i-bit mode not compiled in",
1220 (target_flags & MASK_64BIT) ? 64 : 32);

Above at line 1167, DEFAULT_PCC_STRUCT_RETURN is defined as 1 for x86 chip. And the default chip will be selected by TARGET_CPU_DEFAULT at line 1177 which is 0 for 32 bits chip (then it selects the most common name “i386”). And SUBTARGET_OVERRIDE_OPTIONS at line 1170 is undefined for x86 chips.
For those variables of ix86_*_string we have seen that they are assigned in set_target_switch. For those options the detail is given by [6] as below (confined with x86 architecture).

-mtune=cpu-type
Tune to cpu-type everything applicable about the generated code, except for the ABI and the set of available instructions. The choices for cpu-type are:
generic Produce code optimized for the most common IA32/AMD64/EM64T processors. If you know the CPU on which your code will run, then you should use the corresponding ‘-mtune’ option instead of ‘-mtune=generic’. But, if you do not know exactly what CPU users of your application will have, then you should use this option.
As new processors are deployed in the marketplace, the behavior of this option will change. Therefore, if you upgrade to a newer version of GCC, the code generated option will change to reflect the processors that were most common when that version of GCC was released.
There is no ‘-march=generic’ option because ‘-march’ indicates the instruction set the compiler can use, and there is no generic instruction set applicable to all processors. In contrast, ‘-mtune’ indicates the processor (or, in this case, collection of processors) for which the code is optimized.
native This selects the CPU to tune for at compilation time by determining the processor type of the compiling machine. Using ‘-mtune=native’ will produce code optimized for the local machine under the constraints of the selected instruction set. Using ‘-march=native’ will enable all instruction subsets supported by the local machine (hence the result might not run on different machines).
i386 Original Intel’s i386 CPU.
i486 Intel’s i486 CPU. (No scheduling is implemented for this chip.)
i586, pentium
Intel Pentium CPU with no MMX support.
pentium-mmx
Intel PentiumMMX CPU based on Pentium core with MMX instruction set support.
pentiumpro
Intel PentiumPro CPU.
i686 Same as generic, but when used as march option, PentiumPro instruction set will be used, so the code will run on all i686 family chips.
pentium2 Intel Pentium2 CPU based on PentiumPro core with MMX instruction set support.
pentium3, pentium3m
Intel Pentium3 CPU based on PentiumPro core with MMX and SSE instruction set support.
pentium-m
Low power version of Intel Pentium3 CPU with MMX, SSE and SSE2 instruction set support.
pentium4, pentium4m
Intel Pentium4 CPU with MMX, SSE and SSE2 instruction set support.
prescott Improved version of Intel Pentium4 CPU with MMX, SSE, SSE2 and SSE3 instruction set support.
nocona Improved version of Intel Pentium4 CPU with 64-bit extensions, MMX, SSE, SSE2 and SSE3 instruction set support.
core2 Intel Core2 CPU with 64-bit extensions, MMX, SSE, SSE2, SSE3 and SSSE3 instruction set support.
k6 AMD K6 CPU with MMX instruction set support.
k6-2, k6-3 Improved versions of AMD K6 CPU with MMX and 3dNOW! Instruction set support.
athlon, athlon-tbird
AMD Athlon CPU with MMX, 3dNOW!, enhanced 3dNOW! and SSE prefetch instructions support.
athlon-4, athlon-xp, athlon-mp
Improved AMD Athlon CPU with MMX, 3dNOW!, enhanced 3dNOW! and full SSE instruction set support.
k8, opteron, athlon64, athlon-fx
AMD K8 core based CPUs with x86-64 instruction set support. (This supersets MMX, SSE, SSE2, 3dNOW!, enhanced 3dNOW! and 64-bit instruction set extensions.)
k8-sse3, opteron-sse3, athlon64-sse3
Improved versions of k8, opteron and athlon64 with SSE3 instruction set support.
amdfam10, barcelona
AMD Family 10h core based CPUs with x86-64 instruction set support. (This supersets MMX, SSE, SSE2, SSE3, SSE4A, 3dNOW!, enhanced 3dNOW!, ABM and 64-bit instruction set extensions.)
winchip-c6
IDT Winchip C6 CPU, dealt in same way as i486 with additional MMX instruction set support.
winchip2 IDT Winchip2 CPU, dealt in same way as i486 with additionalMMX and 3dNOW! instruction set support.
c3 Via C3 CPU with MMX and 3dNOW! instruction set support. (No scheduling is implemented for this chip.)
c3-2 Via C3-2 CPU with MMX and SSE instruction set support. (No scheduling is implemented for this chip.)
geode Embedded AMD CPU with MMX and 3dNOW! instruction set support.

While picking a specific cpu-type will schedule things appropriately for that particular chip, the compiler will not generate any code that does not run on the i386 without the ‘-march=cpu-type’ option being used.

-march=cpu-type
Generate instructions for the machine type cpu-type. The choices for cpu-type are the same as for ‘-mtune’. Moreover, specifying ‘-march=cpu-type’ implies ‘-mtune=cpu-type’.

-mcpu=cpu-type
A deprecated synonym for ‘-mtune’.

-masm=dialect
Output asm instructions using selected dialect. Supported choices are ‘intel’ or ‘att’ (the default one). Darwin does not support ‘intel’.

Below ‘-m’ switches are supported in addition to the above on AMD x86-64 processors in 64-bit environments.
-m32 -m64
Generate code for a 32-bit or 64-bit environment. The 32-bit environment sets int, long and pointer to 32 bits and generates code that runs on any i386 system. The 64-bit environment sets int to 32 bits and long and pointer to 64 bits and generates code for AMD’s x86-64 architecture. For darwin only the -m64 option turns off the ‘-fno-pic’ and ‘-mdynamic-no-pic’ options.
-mno-red-zone
Do not use a so called red zone for x86-64 code. The red zone is mandated by the x86-64 ABI, it is a 128-byte area beyond the location of the stack pointer that will not be modified by signal or interrupt handlers and therefore can be used for temporary data without adjusting the stack pointer. The flag ‘-mno-red-zone’ disables this red zone.
-mcmodel=small
Generate code for the small code model: the program and its symbols must be linked in the lower 2 GB of the address space. Pointers are 64 bits. Programs can be statically or dynamically linked. This is the default code model.
-mcmodel=kernel
Generate code for the kernel code model. The kernel runs in the negative 2 GB of the address space. This model has to be used for Linux kernel code.
-mcmodel=medium
Generate code for the medium model: The program is linked in the lower 2 GB of the address space but symbols can be located anywhere in the address space. Programs can be statically or dynamically linked, but building of shared libraries is not supported with the medium model.
-mcmodel=large
Generate code for the large model: This model makes no assumptions about addresses and sizes of sections.

Above paragraphes describe well the meaning of cmodel below.

108 enum cmodel { in i386.h
109 CM_32, /* The traditional 32-bit ABI. */
110 CM_SMALL, /* Assumes all code and data fits in the low 31 bits. */
111 CM_KERNEL, /* Assumes all code and data fits in the high 31 bits. */
112 CM_MEDIUM, /* Assumes code fits in the low 31 bits; data unlimited. */
113 CM_LARGE, /* No assumptions. */
114 CM_SMALL_PIC /* Assumes code+data+got/plt fits in a 31 bit region. */
115 };

override_options (continue)

1222 for (i = 0; i < pta_size; i++)
1223 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
1224 {
1225 ix86_arch = processor_alias_table[i].processor;
1226 /* Default cpu tuning to the architecture. */
1227 ix86_tune = ix86_arch;
1228 if (processor_alias_table[i].flags & PTA_MMX
1229 && !(target_flags_explicit & MASK_MMX))
1230 target_flags |= MASK_MMX;
1231 if (processor_alias_table[i].flags & PTA_3DNOW
1232 && !(target_flags_explicit & MASK_3DNOW))
1233 target_flags |= MASK_3DNOW;
1234 if (processor_alias_table[i].flags & PTA_3DNOW_A
1235 && !(target_flags_explicit & MASK_3DNOW_A))
1236 target_flags |= MASK_3DNOW_A;
1237 if (processor_alias_table[i].flags & PTA_SSE
1238 && !(target_flags_explicit & MASK_SSE))
1239 target_flags |= MASK_SSE;
1240 if (processor_alias_table[i].flags & PTA_SSE2
1241 && !(target_flags_explicit & MASK_SSE2))
1242 target_flags |= MASK_SSE2;
1243 if (processor_alias_table[i].flags & PTA_SSE3
1244 && !(target_flags_explicit & MASK_SSE3))
1245 target_flags |= MASK_SSE3;
1246 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1247 x86_prefetch_sse = true;
1248 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1249 error ("CPU you selected does not support x86-64 instruction set");
1250 break;
1251 }
1252
1253 if (i == pta_size)
1254 error ("bad value (%s) for -march= switch", ix86_arch_string);
1255
1256 for (i = 0; i < pta_size; i++)
1257 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
1258 {
1259 ix86_tune = processor_alias_table[i].processor;
1260 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1261 error ("CPU you selected does not support x86-64 instruction set");
1262
1263 /* Intel CPUs have always interpreted SSE prefetch instructions as
1264 NOPs; so, we can enable SSE prefetch instructions even when
1265 -mtune (rather than -march) points us to a processor that has them.
1266 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
1267 higher processors. */
1268 if (TARGET_CMOVE && (processor_alias_table[i].flags & PTA_PREFETCH_SSE))
1269 x86_prefetch_sse = true;
1270 break;
1271 }
1272 if (i == pta_size)
1273 error ("bad value (%s) for -mtune= switch", ix86_tune_string);

Above target_flags_explicit is also set in set_target_switch. It records whether certain switch is on or off. Thus the compiler can complete the options with knowledge in processor_alias_table.

override_options (continue)

1275 if (optimize_size)
1276 ix86_cost = &size_cost;
1277 else
1278 ix86_cost = processor_target_table[ix86_tune].cost;
1279 target_flags |= processor_target_table[ix86_tune].target_enable;
1280 target_flags &= ~processor_target_table[ix86_tune].target_disable;
1281
1282 /* Arrange to set up i386_stack_locals for all functions. */
1283 init_machine_status = ix86_init_machine_status;
1284
1285 /* Validate -mregparm= value. */
1286 if (ix86_regparm_string)
1287 {
1288 i = atoi (ix86_regparm_string);
1289 if (i < 0 || i > REGPARM_MAX)
1290 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1291 else
1292 ix86_regparm = i;
1293 }
1294 else
1295 if (TARGET_64BIT)
1296 ix86_regparm = REGPARM_MAX;
1297
1298 /* If the user has provided any of the -malign-* options,
1299 warn and use that value only if -falign-* is not set.
1300 Remove this code in GCC 3.2 or later. */
1301 if (ix86_align_loops_string)
1302 {
1303 warning ("-malign-loops is obsolete, use -falign-loops");
1304 if (align_loops == 0)
1305 {
1306 i = atoi (ix86_align_loops_string);
1307 if (i < 0 || i > MAX_CODE_ALIGN)
1308 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1309 else
1310 align_loops = 1 << i;
1311 }
1312 }
1313
1314 if (ix86_align_jumps_string)
1315 {
1316 warning ("-malign-jumps is obsolete, use -falign-jumps");
1317 if (align_jumps == 0)
1318 {
1319 i = atoi (ix86_align_jumps_string);
1320 if (i < 0 || i > MAX_CODE_ALIGN)
1321 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1322 else
1323 align_jumps = 1 << i;
1324 }
1325 }
1326
1327 if (ix86_align_funcs_string)
1328 {
1329 warning ("-malign-functions is obsolete, use -falign-functions");
1330 if (align_functions == 0)
1331 {
1332 i = atoi (ix86_align_funcs_string);
1333 if (i < 0 || i > MAX_CODE_ALIGN)
1334 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1335 else
1336 align_functions = 1 << i;
1337 }
1338 }
1339
1340 /* Default align_* from the processor table. */
1341 if (align_loops == 0)
1342 {
1343 align_loops = processor_target_table[ix86_tune].align_loop;
1344 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
1345 }
1346 if (align_jumps == 0)
1347 {
1348 align_jumps = processor_target_table[ix86_tune].align_jump;
1349 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
1350 }
1351 if (align_functions == 0)
1352 {
1353 align_functions = processor_target_table[ix86_tune].align_func;
1354 }
1355
1356 /* Validate -mpreferred-stack-boundary= value, or provide default.
1357 The default of 128 bits is for Pentium III's SSE __m128, but we
1358 don't want additional code to keep the stack aligned when
1359 optimizing for code size. */
1360 ix86_preferred_stack_boundary = (optimize_size
1361 ? TARGET_64BIT ? 128 : 32
1362 : 128);
1363 if (ix86_preferred_stack_boundary_string)
1364 {
1365 i = atoi (ix86_preferred_stack_boundary_string);
1366 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
1367 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
1368 TARGET_64BIT ? 4 : 2);
1369 else
1370 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
1371 }
1372
1373 /* Validate -mbranch-cost= value, or provide default. */
1374 ix86_branch_cost = processor_target_table[ix86_tune].cost->branch_cost;
1375 if (ix86_branch_cost_string)
1376 {
1377 i = atoi (ix86_branch_cost_string);
1378 if (i < 0 || i > 5)
1379 error ("-mbranch-cost=%d is not between 0 and 5", i);
1380 else
1381 ix86_branch_cost = i;
1382 }
1383
1384 if (ix86_tls_dialect_string)
1385 {
1386 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
1387 ix86_tls_dialect = TLS_DIALECT_GNU;
1388 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
1389 ix86_tls_dialect = TLS_DIALECT_SUN;
1390 else
1391 error ("bad value (%s) for -mtls-dialect= switch",
1392 ix86_tls_dialect_string);
1393 }

For Intel x86, in target options referred in above code, those available in the intel chip is given in below from [6]. Above, REGPARM_MAX is 3 for 32 bits chip and MAX_CODE_ALIGN is 16 defined at the beginning of the function.

-mregparm=num
Control how many registers are used to pass integer arguments. By default, no registers are used to pass arguments, and at most 3 registers can be used. You can control this behavior for a specific function by using the function attribute ‘regparm’.
Warning: if you use this switch, and num is nonzero, then you must build all modules with the same value, including any libraries. This includes the system libraries and startup modules.
-mpreferred-stack-boundary=num
Attempt to keep the stack boundary aligned to a 2 raised to num byte boundary. If ‘-mpreferred-stack-boundary’ is not specified, the default is 4 (16 bytes or 128 bits).
On Pentium and PentiumPro, double and long double values should be aligned to an 8 byte boundary (see ‘-malign-double’) or suffer significant run time performance penalties. On Pentium III, the Streaming SIMD Extension (SSE) data type __m128 may not work properly if it is not 16 byte aligned.
To ensure proper alignment of this value on the stack, the stack boundary must be as aligned as that required by any value stored on the stack. Further, every function must be generated such that it keeps the stack aligned. Thus calling a function compiled with a higher preferred stack boundary from a function compiled with a lower preferred stack boundary will most likely misalign the stack. It is recommended that libraries that use callbacks always use the default setting.
This extra alignment does consume extra stack space, and generally increases code size. Code that is sensitive to stack space usage, such as embedded systems and operating system kernels, may want to reduce the preferred alignment to ‘-mpreferred-stack-boundary=2’.

override_options (continue)

1395 /* Keep nonleaf frame pointers. */
1396 if (TARGET_OMIT_LEAF_FRAME_POINTER)
1397 flag_omit_frame_pointer = 1;
1398
1399 /* If we're doing fast math, we don't care about comparison order
1400 wrt NaNs. This lets us use a shorter comparison sequence. */
1401 if (flag_unsafe_math_optimizations)
1402 target_flags &= ~MASK_IEEE_FP;
1403
1404 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1405 since the insns won't need emulation. */
1406 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
1407 target_flags &= ~MASK_NO_FANCY_MATH_387;
1408
1409 /* Turn on SSE2 builtins for -msse3. */
1410 if (TARGET_SSE3)
1411 target_flags |= MASK_SSE2;
1412
1413 /* Turn on SSE builtins for -msse2. */
1414 if (TARGET_SSE2)
1415 target_flags |= MASK_SSE;
1416
1417 if (TARGET_64BIT)
1418 {
1419 if (TARGET_ALIGN_DOUBLE)
1420 error ("-malign-double makes no sense in the 64bit mode");
1421 if (TARGET_RTD)
1422 error ("-mrtd calling convention not supported in the 64bit mode");
1423 /* Enable by default the SSE and MMX builtins. */
1424 target_flags |= (MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE);
1425 ix86_fpmath = FPMATH_SSE;
1426 }
1427 else
1428 {
1429 ix86_fpmath = FPMATH_387;
1430 /* i386 ABI does not specify red zone. It still makes sense to use it
1431 when programmer takes care to stack from being destroyed. */
1432 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
1433 target_flags |= MASK_NO_RED_ZONE;
1434 }
1435
1436 if (ix86_fpmath_string != 0)
1437 {
1438 if (! strcmp (ix86_fpmath_string, "387"))
1439 ix86_fpmath = FPMATH_387;
1440 else if (! strcmp (ix86_fpmath_string, "sse"))
1441 {
1442 if (!TARGET_SSE)
1443 {
1444 warning ("SSE instruction set disabled, using 387 arithmetics");
1445 ix86_fpmath = FPMATH_387;
1446 }
1447 else
1448 ix86_fpmath = FPMATH_SSE;
1449 }
1450 else if (! strcmp (ix86_fpmath_string, "387,sse")
1451 || ! strcmp (ix86_fpmath_string, "sse,387"))
1452 {
1453 if (!TARGET_SSE)
1454 {
1455 warning ("SSE instruction set disabled, using 387 arithmetics");
1456 ix86_fpmath = FPMATH_387;
1457 }
1458 else if (!TARGET_80387)
1459 {
1460 warning ("387 instruction set disabled, using SSE arithmetics");
1461 ix86_fpmath = FPMATH_SSE;
1462 }
1463 else
1464 ix86_fpmath = FPMATH_SSE | FPMATH_387;
1465 }
1466 else
1467 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
1468 }

Above ix86_fpmath_string is used for following options [6]

-mfpmath=unit
Generate floating point arithmetics for selected unit. The choices for unit
are:
387 Use the standard 387 floating point coprocessor present majority of chips and emulated otherwise. Code compiled with this option will run almost everywhere. The temporary results are computed in 80bit precision instead of precision specified by the type resulting in slightly different results compared to most of other chips. See ‘-ffloat-store’ for more detailed description. This is the default choice for i386 compiler.
sse Use scalar floating point instructions present in the SSE instruction set. This instruction set is supported by Pentium3 and newer chips, in the AMD line by Athlon-4, Athlon-xp and Athlon-mp chips. The earlier version of SSE instruction set supports only single precision arithmetics, thus the double and extended precision arithmetics is still done using 387. Later version, present only in Pentium4 and the future AMD x86-64 chips supports double precision arithmetics too.
For the i386 compiler, you need to use ‘-march=cpu-type’, ‘-msse’ or ‘-msse2’ switches to enable SSE extensions and make this option effective. For the x86-64 compiler, these extensions are enabled by default.
The resulting code should be considerably faster in the majority of cases and avoid the numerical instability problems of 387 code, but may break some existing code that expects temporaries to be 80bit. This is the default choice for the x86-64 compiler.
sse,387 Attempt to utilize both instruction sets at once. This effectively double the amount of available registers and on chips with separate execution units for 387 and SSE the execution resources too. Use this option with care, as it is still experimental, because the GCC register allocator does not model separate functional units well resulting in instable performance.

override_options (continue)

1470 /* It makes no sense to ask for just SSE builtins, so MMX is also turned
1471 on by -msse. */
1472 if (TARGET_SSE)
1473 {
1474 target_flags |= MASK_MMX;
1475 x86_prefetch_sse = true;
1476 }
1477
1478 /* If it has 3DNow! it also has MMX so MMX is also turned on by -m3dnow */
1479 if (TARGET_3DNOW)
1480 {
1481 target_flags |= MASK_MMX;
1482 /* If we are targeting the Athlon architecture, enable the 3Dnow/MMX
1483 extensions it adds. */
1484 if (x86_3dnow_a & (1 << ix86_arch))
1485 target_flags |= MASK_3DNOW_A;
1486 }
1487 if ((x86_accumulate_outgoing_args & TUNEMASK)
1488 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
1489 && !optimize_size)
1490 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
1491
1492 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1493 {
1494 char *p;
1495 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
1496 p = strchr (internal_label_prefix, 'X');
1497 internal_label_prefix_len = p - internal_label_prefix;
1498 *p = '/0';
1499 }
1500 }

Above at line 1487, x86_accumulate_outgoing_args is defined as:

507 const int x86_accumulate_outgoing_args = m_ATHLON_K8 | m_PENT4 | m_PPRO; in i386.c

In which, for example, m_ATHLON_K8 is defined in below.

470 #define m_K8 (1<<PROCESSOR_K8) in i386.c
471 #define m_ATHLON_K8 (m_K8 | m_ATHLON)

PROCESSOR_K8 is one of values of enum processor_type. Obviously, variables like x86_accumulate_outgoing_args define the enable chips for the characteristics.
At line 1495, in Linux, ASM_GENERATE_INTERNAL_LABEL is defined as:

213 #undef ASM_GENERATE_INTERNAL_LABEL in linux.h
214 #define ASM_GENERATE_INTERNAL_LABEL(LABEL,PREFIX,NUM) /
215 sprintf (LABEL, "*.L%s%ld", PREFIX, (long)(NUM))

So internal_label_prefix will be “*.LLX” (‘/0’ will replace ‘0’ at line 1498).

内容来自用户分享和网络整理，不保证内容的准确性，如有侵权内容，可联系管理员处理

标签：

相关文章推荐

新的分享

章节导航